Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
5998d3cc
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5998d3cc
编写于
2月 19, 2019
作者:
L
lujun
提交者:
GitHub
2月 19, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request
#8
from PaddlePaddle/develop
merge to local
上级
d64abc85
a661d0bd
变更
101
显示空白变更内容
内联
并排
Showing
101 changed file
with
3062 addition
and
765 deletion
+3062
-765
cmake/external/protobuf.cmake
cmake/external/protobuf.cmake
+2
-2
cmake/external/python.cmake
cmake/external/python.cmake
+2
-2
paddle/fluid/API.spec
paddle/fluid/API.spec
+4
-4
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+6
-1
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+2
-0
paddle/fluid/framework/details/inplace_op_pass.cc
paddle/fluid/framework/details/inplace_op_pass.cc
+1
-1
paddle/fluid/framework/details/memory_optimize_helper.cc
paddle/fluid/framework/details/memory_optimize_helper.cc
+66
-6
paddle/fluid/framework/details/memory_optimize_helper.h
paddle/fluid/framework/details/memory_optimize_helper.h
+2
-0
paddle/fluid/framework/details/memory_optimize_helper_test.cc
...le/fluid/framework/details/memory_optimize_helper_test.cc
+46
-0
paddle/fluid/framework/details/memory_optimize_pass.cc
paddle/fluid/framework/details/memory_optimize_pass.cc
+57
-51
paddle/fluid/framework/inplace_op_inference_test.cc
paddle/fluid/framework/inplace_op_inference_test.cc
+16
-16
paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc
paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc
+7
-3
paddle/fluid/imperative/layer.cc
paddle/fluid/imperative/layer.cc
+1
-1
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc
...e/fluid/inference/analysis/ir_passes/subgraph_detector.cc
+0
-71
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h
...le/fluid/inference/analysis/ir_passes/subgraph_detector.h
+1
-26
paddle/fluid/operators/controlflow/compare_op.cc
paddle/fluid/operators/controlflow/compare_op.cc
+5
-5
paddle/fluid/operators/detection/density_prior_box_op.h
paddle/fluid/operators/detection/density_prior_box_op.h
+6
-7
paddle/fluid/operators/detection/prior_box_op.h
paddle/fluid/operators/detection/prior_box_op.h
+30
-39
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
+14
-21
paddle/fluid/operators/group_norm_op.cc
paddle/fluid/operators/group_norm_op.cc
+37
-2
paddle/fluid/operators/jit/benchmark.cc
paddle/fluid/operators/jit/benchmark.cc
+111
-0
paddle/fluid/operators/jit/gen/CMakeLists.txt
paddle/fluid/operators/jit/gen/CMakeLists.txt
+1
-0
paddle/fluid/operators/jit/gen/embseqpool.cc
paddle/fluid/operators/jit/gen/embseqpool.cc
+149
-0
paddle/fluid/operators/jit/gen/embseqpool.h
paddle/fluid/operators/jit/gen/embseqpool.h
+81
-0
paddle/fluid/operators/jit/gen/seqpool.h
paddle/fluid/operators/jit/gen/seqpool.h
+1
-1
paddle/fluid/operators/jit/helper.cc
paddle/fluid/operators/jit/helper.cc
+1
-0
paddle/fluid/operators/jit/helper.h
paddle/fluid/operators/jit/helper.h
+9
-0
paddle/fluid/operators/jit/kernel_base.h
paddle/fluid/operators/jit/kernel_base.h
+47
-19
paddle/fluid/operators/jit/kernel_key.cc
paddle/fluid/operators/jit/kernel_key.cc
+5
-0
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
+1
-0
paddle/fluid/operators/jit/more/mkl/mkl.cc
paddle/fluid/operators/jit/more/mkl/mkl.cc
+11
-0
paddle/fluid/operators/jit/more/mkl/mkl.h
paddle/fluid/operators/jit/more/mkl/mkl.h
+29
-0
paddle/fluid/operators/jit/refer/CMakeLists.txt
paddle/fluid/operators/jit/refer/CMakeLists.txt
+1
-0
paddle/fluid/operators/jit/refer/refer.cc
paddle/fluid/operators/jit/refer/refer.cc
+2
-0
paddle/fluid/operators/jit/refer/refer.h
paddle/fluid/operators/jit/refer/refer.h
+34
-0
paddle/fluid/operators/jit/test.cc
paddle/fluid/operators/jit/test.cc
+197
-1
paddle/fluid/operators/load_combine_op.cc
paddle/fluid/operators/load_combine_op.cc
+5
-1
paddle/fluid/operators/lstm_op.h
paddle/fluid/operators/lstm_op.h
+4
-0
paddle/fluid/operators/lstmp_op.h
paddle/fluid/operators/lstmp_op.h
+5
-0
paddle/fluid/operators/ngraph/ngraph_bridge.cc
paddle/fluid/operators/ngraph/ngraph_bridge.cc
+2
-0
paddle/fluid/operators/ngraph/ngraph_ops.h
paddle/fluid/operators/ngraph/ngraph_ops.h
+1
-0
paddle/fluid/operators/ngraph/ops/batch_norm_op.h
paddle/fluid/operators/ngraph/ops/batch_norm_op.h
+7
-0
paddle/fluid/operators/ngraph/ops/cross_entropy_op.h
paddle/fluid/operators/ngraph/ops/cross_entropy_op.h
+145
-0
paddle/fluid/operators/ngraph/ops/fill_constant_op.h
paddle/fluid/operators/ngraph/ops/fill_constant_op.h
+0
-2
paddle/fluid/operators/row_conv_op.cc
paddle/fluid/operators/row_conv_op.cc
+5
-5
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+17
-13
paddle/fluid/pybind/ir.cc
paddle/fluid/pybind/ir.cc
+32
-26
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+97
-23
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+16
-0
python/CMakeLists.txt
python/CMakeLists.txt
+1
-0
python/paddle/fluid/compiler.py
python/paddle/fluid/compiler.py
+4
-1
python/paddle/fluid/contrib/int8_inference/README.md
python/paddle/fluid/contrib/int8_inference/README.md
+2
-2
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
...ddle/fluid/contrib/slim/quantization/quantization_pass.py
+377
-17
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+6
-0
python/paddle/fluid/contrib/slim/tests/__init__.py
python/paddle/fluid/contrib/slim/tests/__init__.py
+0
-0
python/paddle/fluid/contrib/slim/tests/configs/config.yaml
python/paddle/fluid/contrib/slim/tests/configs/config.yaml
+1
-1
python/paddle/fluid/contrib/slim/tests/configs/pruners.yaml
python/paddle/fluid/contrib/slim/tests/configs/pruners.yaml
+0
-0
python/paddle/fluid/contrib/slim/tests/configs/pruners_0.yaml
...on/paddle/fluid/contrib/slim/tests/configs/pruners_0.yaml
+0
-0
python/paddle/fluid/contrib/slim/tests/test_factory.py
python/paddle/fluid/contrib/slim/tests/test_factory.py
+1
-1
python/paddle/fluid/contrib/slim/tests/test_graph.py
python/paddle/fluid/contrib/slim/tests/test_graph.py
+80
-0
python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
...paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+372
-0
python/paddle/fluid/contrib/tests/CMakeLists.txt
python/paddle/fluid/contrib/tests/CMakeLists.txt
+5
-1
python/paddle/fluid/contrib/tests/test_calibration.py
python/paddle/fluid/contrib/tests/test_calibration.py
+0
-4
python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
...on/paddle/fluid/contrib/tests/test_quantize_transpiler.py
+5
-3
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+224
-25
python/paddle/fluid/imperative/layers.py
python/paddle/fluid/imperative/layers.py
+94
-19
python/paddle/fluid/imperative/nn.py
python/paddle/fluid/imperative/nn.py
+0
-6
python/paddle/fluid/layers/control_flow.py
python/paddle/fluid/layers/control_flow.py
+8
-12
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+4
-1
python/paddle/fluid/layers/layer_function_generator.py
python/paddle/fluid/layers/layer_function_generator.py
+6
-2
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+45
-8
python/paddle/fluid/layers/ops.py
python/paddle/fluid/layers/ops.py
+2
-2
python/paddle/fluid/layers/tensor.py
python/paddle/fluid/layers/tensor.py
+5
-1
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+3
-3
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+2
-0
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+4
-0
python/paddle/fluid/tests/unittests/ngraph/test_accuracy_ngraph_op.py
...e/fluid/tests/unittests/ngraph/test_accuracy_ngraph_op.py
+1
-33
python/paddle/fluid/tests/unittests/ngraph/test_batch_norm_ngraph_op.py
...fluid/tests/unittests/ngraph/test_batch_norm_ngraph_op.py
+0
-16
python/paddle/fluid/tests/unittests/ngraph/test_conv2d_ngraph_op.py
...dle/fluid/tests/unittests/ngraph/test_conv2d_ngraph_op.py
+0
-55
python/paddle/fluid/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py
...id/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py
+275
-0
python/paddle/fluid/tests/unittests/ngraph/test_elementwise_add_ngraph_op.py
.../tests/unittests/ngraph/test_elementwise_add_ngraph_op.py
+2
-11
python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py
...id/tests/unittests/ngraph/test_fill_constant_ngraph_op.py
+17
-7
python/paddle/fluid/tests/unittests/ngraph/test_mean_ngraph_op.py
...addle/fluid/tests/unittests/ngraph/test_mean_ngraph_op.py
+0
-7
python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py
...paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py
+1
-33
python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py
...dle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py
+10
-46
python/paddle/fluid/tests/unittests/ngraph/test_scale_ngraph_op.py
...ddle/fluid/tests/unittests/ngraph/test_scale_ngraph_op.py
+0
-19
python/paddle/fluid/tests/unittests/ngraph/test_softmax_ngraph_op.py
...le/fluid/tests/unittests/ngraph/test_softmax_ngraph_op.py
+0
-6
python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py
...ddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py
+0
-25
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+4
-0
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
...ddle/fluid/tests/unittests/parallel_executor_test_base.py
+1
-1
python/paddle/fluid/tests/unittests/test_base_layer.py
python/paddle/fluid/tests/unittests/test_base_layer.py
+82
-0
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+10
-0
python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
...e/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
+4
-0
python/paddle/fluid/tests/unittests/test_imperative.py
python/paddle/fluid/tests/unittests/test_imperative.py
+12
-0
python/paddle/fluid/tests/unittests/test_imperative_gan.py
python/paddle/fluid/tests/unittests/test_imperative_gan.py
+0
-7
python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
...n/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+0
-16
python/paddle/fluid/tests/unittests/test_imperative_resnet.py
...on/paddle/fluid/tests/unittests/test_imperative_resnet.py
+12
-14
python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
...id/tests/unittests/test_ir_memory_optimize_transformer.py
+48
-0
python/requirements.txt
python/requirements.txt
+1
-1
tools/manylinux1/Dockerfile.x64
tools/manylinux1/Dockerfile.x64
+4
-4
tools/manylinux1/build_scripts/build.sh
tools/manylinux1/build_scripts/build.sh
+6
-6
未找到文件。
cmake/external/protobuf.cmake
浏览文件 @
5998d3cc
...
...
@@ -203,7 +203,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
ENDIF
()
SET
(
PROTOBUF_REPO
"https://github.com/google/protobuf.git"
)
SET
(
PROTOBUF_TAG
"
9f75c5aa851cd877fb0d93ccc31b8567a6706546
"
)
SET
(
PROTOBUF_TAG
"
v3.6.1
"
)
ExternalProject_Add
(
${
TARGET_NAME
}
...
...
@@ -231,7 +231,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
)
ENDFUNCTION
()
SET
(
PROTOBUF_VERSION 3.1
)
SET
(
PROTOBUF_VERSION 3.
6.
1
)
IF
(
NOT PROTOBUF_FOUND
)
build_protobuf
(
extern_protobuf FALSE
)
...
...
cmake/external/python.cmake
浏览文件 @
5998d3cc
...
...
@@ -74,8 +74,8 @@ IF(PYTHONINTERP_FOUND)
find_python_module
(
wheel REQUIRED
)
find_python_module
(
google.protobuf REQUIRED
)
FIND_PACKAGE
(
NumPy REQUIRED
)
IF
(
${
PY_GOOGLE.PROTOBUF_VERSION
}
AND
${
PY_GOOGLE.PROTOBUF_VERSION
}
VERSION_LESS
"3.
0.0
"
)
MESSAGE
(
FATAL_ERROR
"Found Python Protobuf
${
PY_GOOGLE.PROTOBUF_VERSION
}
< 3.
0.0
, "
IF
(
${
PY_GOOGLE.PROTOBUF_VERSION
}
AND
${
PY_GOOGLE.PROTOBUF_VERSION
}
VERSION_LESS
"3.
6.1
"
)
MESSAGE
(
FATAL_ERROR
"Found Python Protobuf
${
PY_GOOGLE.PROTOBUF_VERSION
}
< 3.
6.1
, "
"please use pip to upgrade protobuf. pip install -U protobuf"
)
ENDIF
()
ENDIF
(
PYTHONINTERP_FOUND
)
...
...
paddle/fluid/API.spec
浏览文件 @
5998d3cc
...
...
@@ -261,7 +261,7 @@ paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=N
paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None))
paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=
'ignored'
, defaults=(None,))
paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=
None
, defaults=(None,))
paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
...
...
@@ -473,11 +473,11 @@ paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_
paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
paddle.fluid.LoDTensor.recursive_sequence_lengths recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
paddle.fluid.LoDTensor.set 1. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CPUPlace) -> None 2. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CPUPlace) -> None 3. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CPUPlace) -> None 4. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CPUPlace) -> None 5. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CPUPlace) -> None 6. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CPUPlace) -> None 7. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CPUPlace) -> None 8. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CPUPlace) -> None 9. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPlace) -> None 10. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPlace) -> None 11. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPlace) -> None 12. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPlace) -> None 13. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPlace) -> None 14. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPlace) -> None 15. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPlace) -> None 16. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPlace) -> None 17. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPinnedPlace) -> None 18. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPinnedPlace) -> None 19. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPinnedPlace) -> None 20. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPinnedPlace) -> None 21. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPinnedPlace) -> None 22. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPinnedPlace) -> None 23. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPinnedPlace) -> None 24. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPinnedPlace) -> None
paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core.LoDTensor,
arg0
: List[List[int]]) -> None
paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor,
arg0
: List[List[int]]) -> None
paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core.LoDTensor,
lod
: List[List[int]]) -> None
paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor,
recursive_sequence_lengths
: List[List[int]]) -> None
paddle.fluid.LoDTensor.shape shape(self: paddle.fluid.core.Tensor) -> List[int]
paddle.fluid.LoDTensorArray.__init__ __init__(self: paddle.fluid.core.LoDTensorArray) -> None
paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray,
arg0
: paddle.fluid.core.LoDTensor) -> None
paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray,
tensor
: paddle.fluid.core.LoDTensor) -> None
paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> None
paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None
paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None
...
...
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
5998d3cc
...
...
@@ -50,7 +50,12 @@ cc_library(data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_
cc_library
(
gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
cc_library
(
fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope
)
cc_library
(
memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper
)
if
(
WITH_GPU
)
cc_library
(
memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper gpu_info
)
else
()
cc_library
(
memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper cpu_info
)
endif
()
cc_library
(
memory_optimize_pass SRCS memory_optimize_pass.cc DEPS memory_optimize_helper pass
)
cc_library
(
inplace_op_pass SRCS inplace_op_pass.cc DEPS memory_optimize_pass op_info
)
cc_library
(
modify_op_lock_and_record_event_pass SRCS modify_op_lock_and_record_event_pass.cc DEPS computation_op_handle op_graph_view multi_devices_helper
)
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
5998d3cc
...
...
@@ -240,7 +240,9 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
continue
;
}
}
VLOG
(
3
)
<<
"Start Apply Pass "
<<
pass
->
Type
();
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
VLOG
(
3
)
<<
"Finish Apply Pass "
<<
pass
->
Type
();
}
return
graph
;
}
...
...
paddle/fluid/framework/details/inplace_op_pass.cc
浏览文件 @
5998d3cc
...
...
@@ -49,7 +49,7 @@ DEFINE_bool(
"If this option turns on, only these op in whitelist can be inplaced."
"If it turns off, all of the running op can be candidate of inplaced op."
"Such as scale, elementwise_add"
"By default, it's turned o
n
"
);
"By default, it's turned o
ff
"
);
DECLARE_string
(
memory_optimize_debug
);
...
...
paddle/fluid/framework/details/memory_optimize_helper.cc
浏览文件 @
5998d3cc
...
...
@@ -13,13 +13,19 @@
// limitations under the License.
#include "paddle/fluid/framework/details/memory_optimize_helper.h"
#include <algorithm>
#include <deque>
#include <functional>
#include <i
ostream
>
#include <i
terator
>
#include <numeric>
#include <sstream>
#include <string>
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/platform/cpu_info.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/gpu_info.h"
#endif // PADDLE_WITH_CUDA
namespace
paddle
{
namespace
framework
{
...
...
@@ -166,6 +172,11 @@ struct NodeComparator {
bool
operator
()(
ir
::
Node
*
lhs
,
ir
::
Node
*
rhs
)
const
{
auto
*
lhs_desc
=
FindVarDescInBlock
(
lhs
);
auto
*
rhs_desc
=
FindVarDescInBlock
(
rhs
);
// match data type
if
(
lhs_desc
->
GetDataType
()
!=
rhs_desc
->
GetDataType
())
{
return
false
;
}
// match shape
auto
lhs_shape
=
lhs_desc
->
GetShape
();
auto
rhs_shape
=
rhs_desc
->
GetShape
();
if
((
lhs_shape
[
0
]
==
-
1
&&
rhs_shape
[
0
]
==
-
1
)
||
...
...
@@ -230,6 +241,27 @@ ir::Node* OrderedSet::FindBestFitNode(ir::Node* var) const {
return
found_node
;
}
ir
::
Node
*
OrderedSet
::
FindNextBestFitNode
(
ir
::
Node
*
var
,
ir
::
Node
*
prev
)
const
{
ir
::
Node
*
found_node
=
nullptr
;
NodeComparator
functor
;
auto
it
=
std
::
find_if
(
nodes_
.
begin
(),
nodes_
.
end
(),
[
&
](
const
NodeVector
&
v
)
{
if
(
v
.
front
()
==
prev
)
return
true
;
else
return
false
;
});
PADDLE_ENFORCE
(
it
!=
nodes_
.
end
(),
"Not found previous in node list!"
);
for
(
it
=
std
::
next
(
it
);
it
!=
nodes_
.
end
();
++
it
)
{
auto
&
candidate
=
it
->
front
();
if
(
functor
(
var
,
candidate
))
{
found_node
=
candidate
;
break
;
}
}
return
found_node
;
}
bool
OrderedSet
::
Has
(
ir
::
Node
*
var
)
const
{
if
(
mark_table_
.
count
(
var
->
Name
()))
{
auto
&
node_in_samename
=
mark_table_
.
at
(
var
->
Name
());
...
...
@@ -241,10 +273,15 @@ bool OrderedSet::Has(ir::Node* var) const {
return
false
;
}
void
OrderedSet
::
Erase
(
const
std
::
string
&
var
)
{
PADDLE_ENFORCE
(
mark_table_
.
count
(
var
));
nodes_
.
erase
(
mark_table_
[
var
]);
mark_table_
.
erase
(
var
);
}
void
OrderedSet
::
Erase
(
ir
::
Node
*
var
)
{
PADDLE_ENFORCE
(
mark_table_
.
count
(
var
->
Name
()));
nodes_
.
erase
(
mark_table_
[
var
->
Name
()]);
mark_table_
.
erase
(
var
->
Name
());
PADDLE_ENFORCE
(
var
!=
nullptr
);
Erase
(
var
->
Name
());
}
std
::
string
OrderedSet
::
ToString
()
const
{
...
...
@@ -274,14 +311,35 @@ bool NodeCanReused(ir::Node* node) {
return
flag
;
}
int
MinChunkSize
()
{
int
size
{
0
};
#ifdef PADDLE_WITH_CUDA
size
=
platform
::
GpuMinChunkSize
();
#else
size
=
platform
::
CpuMinChunkSize
();
#endif // PADDLE_WITH_CUDA
return
size
;
}
bool
NodeCanReused
(
const
VarDesc
&
node
)
{
auto
type
=
node
.
GetType
();
// only these types holds bulk of gpu memory
if
(
!
(
type
==
proto
::
VarType
::
LOD_TENSOR
||
type
==
proto
::
VarType
::
SELECTED_ROWS
||
type
==
proto
::
VarType
::
LOD_TENSOR_ARRAY
))
{
return
false
;
}
if
(
node
.
Persistable
()
||
node
.
GetShape
().
empty
())
{
// persistable variable is parameter
if
(
node
.
Persistable
())
{
return
false
;
}
// shape < min_chunk_size is meaningless.
// further more, fetched loss always has size = 1
// which should not be reused.
auto
shape
=
node
.
GetShape
();
int
size
=
std
::
abs
(
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
int
>
()));
if
(
shape
.
empty
()
||
size
<
MinChunkSize
())
{
return
false
;
}
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
...
...
@@ -461,7 +519,9 @@ ir::Node* ControlFlowGraph::GetNodeByName(const std::string& name,
for
(
auto
*
node
:
ops_
)
{
if
(
node
==
op
)
break
;
for
(
auto
&
output
:
node
->
outputs
)
{
if
(
output
->
Name
()
==
name
)
{
PADDLE_ENFORCE
((
output
!=
nullptr
&&
output
->
IsVar
()),
"Output is empty!"
);
if
(
output
->
Var
()
&&
output
->
Name
()
==
name
)
{
found_node
=
output
;
}
}
...
...
paddle/fluid/framework/details/memory_optimize_helper.h
浏览文件 @
5998d3cc
...
...
@@ -55,6 +55,7 @@ class OrderedSet {
void
Insert
(
ir
::
Node
*
var
);
void
Erase
(
ir
::
Node
*
var
);
void
Erase
(
const
std
::
string
&
var
);
bool
Has
(
ir
::
Node
*
var
)
const
;
void
Clear
()
{
mark_table_
.
clear
();
...
...
@@ -62,6 +63,7 @@ class OrderedSet {
}
// find the bestfit shape node block with var.
ir
::
Node
*
FindBestFitNode
(
ir
::
Node
*
var
)
const
;
ir
::
Node
*
FindNextBestFitNode
(
ir
::
Node
*
var
,
ir
::
Node
*
prev
)
const
;
// map store non-const iterator, can not promise const
int
GetNodeIndexInPool
(
ir
::
Node
*
var
);
// pool all node to string
...
...
paddle/fluid/framework/details/memory_optimize_helper_test.cc
浏览文件 @
5998d3cc
...
...
@@ -107,6 +107,52 @@ TEST(OrderedSet, Normal) {
ASSERT_EQ
(
pool
.
GetNodeIndexInPool
(
cache
),
5
);
// match 4:[5,2]
}
}
TEST
(
OrderedSet
,
FindBestFitNode
)
{
OrderedSet
pool
;
std
::
vector
<
std
::
unique_ptr
<
ir
::
Node
>>
nodes
;
ProgramDesc
prog
;
BlockDesc
*
block_desc
=
prog
.
MutableBlock
(
0
);
auto
*
op_desc
=
block_desc
->
AppendOp
();
op_desc
->
SetType
(
"dummy"
);
std
::
unique_ptr
<
ir
::
Node
>
op
=
ir
::
CreateNodeForTest
(
op_desc
);
{
auto
desc
=
block_desc
->
Var
(
"a"
);
desc
->
SetShape
({
128
,
128
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
{
auto
desc
=
block_desc
->
Var
(
"b"
);
desc
->
SetShape
({
128
,
129
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
{
auto
desc
=
block_desc
->
Var
(
"c"
);
desc
->
SetShape
({
128
,
128
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
for
(
auto
&
node
:
nodes
)
{
pool
.
Insert
(
node
.
get
());
}
// FindNextBestFitNode
auto
*
n
=
nodes
[
0
].
get
();
auto
*
cache
=
pool
.
FindBestFitNode
(
n
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"a"
);
cache
=
pool
.
FindNextBestFitNode
(
n
,
cache
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"c"
);
cache
=
pool
.
FindNextBestFitNode
(
n
,
cache
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"b"
);
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
...
...
paddle/fluid/framework/details/memory_optimize_pass.cc
浏览文件 @
5998d3cc
...
...
@@ -69,11 +69,20 @@ std::unique_ptr<ir::Graph> MemoryOptimizePass::ApplyImpl(
}
for
(
auto
&
var
:
op
->
outputs
)
{
if
(
!
NodeCanReused
(
var
)
||
cfg_
->
Use
(
op
).
count
(
var
->
Name
())
==
0
||
skip_set_
.
count
(
var
->
Name
()))
if
(
var
->
IsVar
()
&&
!
var
->
IsCtrlVar
()
&&
skip_set_
.
count
(
var
->
Name
()))
{
VLOG
(
3
)
<<
"Skip set contains variable of "
<<
var
->
Name
()
<<
"disable reuse on it. skipped"
;
continue
;
}
if
(
NodeCanReused
(
var
)
&&
cfg_
->
Use
(
op
).
count
(
var
->
Name
())
==
0
)
{
ir
::
Node
*
cache
=
pool_
.
FindBestFitNode
(
var
);
while
(
cache
!=
nullptr
&&
var
->
Name
()
==
cache
->
Name
())
{
VLOG
(
3
)
<<
"The same cache variable is cascade reused. "
<<
cache
->
Name
()
<<
" is re-filled to the pool after "
<<
"the reused op is finished. Current op can not "
<<
"replace it again. Skip this candidate."
;
cache
=
pool_
.
FindNextBestFitNode
(
var
,
cache
);
}
if
(
var
->
Name
()
==
FLAGS_memory_optimize_debug
)
{
VLOG
(
3
)
<<
"start match var "
<<
DebugString
(
var
)
<<
" of op "
<<
op
->
Name
();
...
...
@@ -82,42 +91,37 @@ std::unique_ptr<ir::Graph> MemoryOptimizePass::ApplyImpl(
<<
((
cache
==
nullptr
)
?
"False"
:
"True"
);
}
if
(
cache
==
nullptr
)
continue
;
if
(
var
->
Name
()
==
cache
->
Name
())
{
VLOG
(
3
)
<<
"The same cache variable is cascade reused."
<<
var
->
Name
()
<<
" is re-filled to the pool after"
<<
"the reused op is finished. Current op can not "
<<
"replace it again. Skip this candidate."
;
continue
;
if
(
cache
!=
nullptr
)
{
int
node_idx_in_pool
=
pool_
.
GetNodeIndexInPool
(
cache
);
VLOG
(
3
)
<<
string
::
Sprintf
(
"!!! %s, %s => %s, cache idx %d, pool size %d"
,
std
::
to_string
(
reuse_id
++
),
DebugString
(
var
),
DebugString
(
cache
),
node_idx_in_pool
,
static_cast
<
int
>
(
pool_
.
size
()));
// NOTE(dzhwinter): update the ProgramDesc/IR Graph
// and the CFG Graph on the fly.
//
// IR Graph define the dependence relationship between nodes.
//
// ProgramDesc defines the input/output vars. Its used in
// CreateOp, CreateVar when running happens.
//
// CFG Graph store the liveness information, when reuse happens
// we also need to update the variable liveness.
const
std
::
string
var_name
=
var
->
Name
();
const
std
::
string
cache_name
=
cache
->
Name
();
// update CFG Graph on the fly.
// reused var maybe re-fill into the pool
cfg_
->
RenameVarInCFGGraph
(
var
->
Name
(),
cache
->
Name
(),
idx
);
// NOTE(dzhwinter): we need to both update the ProgramDesc
// and IR Graph. because op_desc/var_desc is used in CreateOp,
// CreateVar when running happens. But IR Graph
// define the dependence relationship between nodes.
RenameVarInGraphDesc
(
var
->
Name
(),
cache
->
Name
(),
idx
);
RenameVarInGraphNode
(
var
->
Name
(),
cache
->
Name
(),
idx
,
graph
.
get
());
pool_
.
Erase
(
cache
);
cfg_
->
RenameVarInCFGGraph
(
var_name
,
cache_name
,
idx
);
RenameVarInGraphDesc
(
var_name
,
cache_name
,
idx
);
RenameVarInGraphNode
(
var_name
,
cache_name
,
idx
,
graph
.
get
());
pool_
.
Erase
(
cache_name
);
}
}
}
// fill the pool
std
::
unordered_set
<
std
::
string
>
unlived_vars
;
for
(
auto
var
:
cfg_
->
LiveIn
(
op
))
{
if
(
cfg_
->
LiveOut
(
op
).
count
(
var
)
==
0
)
{
unlived_vars
.
emplace
(
var
);
}
}
for
(
auto
var
:
unlived_vars
)
{
ir
::
Node
*
var_node
=
cfg_
->
GetNodeByName
(
var
,
op
);
if
(
var_node
==
nullptr
||
var_node
->
IsCtrlVar
())
continue
;
if
(
NodeCanReused
(
var_node
)
&&
!
pool_
.
Has
(
var_node
))
{
pool_
.
Insert
(
var_node
);
}
...
...
@@ -273,8 +277,7 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
// redirect the input to the latest version of cache_var
for
(
auto
*
node
:
op
->
inputs
)
{
if
(
node
->
Name
()
==
var
)
{
ir
::
Node
*
cache_node
=
graph
->
CreateVarNode
(
var_desc
.
get
());
var_nodes_
[
cache_var
].
emplace_back
(
cache_node
);
ir
::
Node
*
cache_node
=
var_nodes_
[
cache_var
].
back
();
// swap node to cache_node
cache_node
->
outputs
.
insert
(
cache_node
->
outputs
.
end
(),
...
...
@@ -283,11 +286,15 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
auto
*
prev_op
=
node
->
inputs
[
0
];
std
::
replace
(
prev_op
->
outputs
.
begin
(),
prev_op
->
outputs
.
end
(),
node
,
cache_node
);
cache_node
->
inputs
.
emplace_back
(
prev_op
);
for
(
auto
*
next_op
:
node
->
outputs
)
{
std
::
replace
(
next_op
->
inputs
.
begin
(),
next_op
->
inputs
.
end
(),
node
,
cache_node
);
}
// erase unused node
auto
&
nodes
=
var_nodes_
.
at
(
var
);
nodes
.
erase
(
std
::
remove
(
nodes
.
begin
(),
nodes
.
end
(),
node
),
nodes
.
end
());
graph
->
RemoveNode
(
node
);
}
}
...
...
@@ -307,15 +314,14 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
std
::
replace
(
next_op
->
inputs
.
begin
(),
next_op
->
inputs
.
end
(),
node
,
cache_node
);
}
}
}
}
// release node of unused var in graph
for
(
auto
*
node
:
var_nodes_
[
var
])
{
// erase unused node
auto
&
nodes
=
var_nodes_
.
at
(
var
);
nodes
.
erase
(
std
::
remove
(
nodes
.
begin
(),
nodes
.
end
(),
node
),
nodes
.
end
());
graph
->
RemoveNode
(
node
);
}
var_nodes_
.
at
(
var
).
clear
();
}
}
}
}
// namespace details
...
...
paddle/fluid/framework/inplace_op_inference_test.cc
浏览文件 @
5998d3cc
...
...
@@ -179,11 +179,11 @@ TEST(InferInplace, SingleOpInplaceInToOut) {
op
->
SetOutput
(
"Out"
,
{
"test2_out"
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
64
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
64
,
128
,
128
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_b"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_c"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
,
128
,
128
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
...
@@ -201,11 +201,11 @@ TEST(InferInplace, SingleGradOpInplaceInToOut) {
op
->
SetOutput
(
GradVarName
(
"X"
),
{
"test2_a"
,
"test2_b"
,
"test2_c"
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_b"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_c"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
...
@@ -233,12 +233,12 @@ TEST(InferInplace, MultiOutInplaceInToOut) {
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
...
@@ -267,12 +267,12 @@ TEST(InferInplace, MultiGradInplaceInToOut) {
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
...
paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc
浏览文件 @
5998d3cc
...
...
@@ -38,9 +38,13 @@ std::unique_ptr<ir::Graph> IdentityScaleOpCleanPass::ApplyImpl(
->
assert_is_op
(
"scale"
)
->
assert_op_attr
<
float
>
(
"scale"
,
1.
)
->
assert_op_attr
<
float
>
(
"bias"
,
0.
);
auto
scale_out
=
detector
.
mutable_pattern
()
auto
scale_out
=
detector
.
mutable_pattern
()
->
NewNode
(
"scale_out"
)
->
assert_is_op_output
(
"scale"
);
->
assert_is_op_output
(
"scale"
)
// scale's output var should has only one consumer, or it can't be
// removed.
->
assert_more
([](
Node
*
x
)
{
return
x
->
outputs
.
size
()
==
1UL
;
});
pre_op
->
LinksTo
({
scale_in
});
scale_op
->
LinksFrom
({
scale_in
}).
LinksTo
({
scale_out
});
...
...
paddle/fluid/imperative/layer.cc
浏览文件 @
5998d3cc
...
...
@@ -207,7 +207,7 @@ framework::LoDTensor& VarBase::GradValue() {
std
::
map
<
std
::
string
,
std
::
vector
<
VarBase
*>>
OpBase
::
ApplyGrad
()
{
if
(
grad_op_descs_
.
empty
()
&&
backward_id_
<=
0
)
{
LOG
(
WARNING
)
<<
"op with no grad: "
<<
op_desc_
->
Type
();
VLOG
(
3
)
<<
"op with no grad: "
<<
op_desc_
->
Type
();
return
{};
}
...
...
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc
浏览文件 @
5998d3cc
...
...
@@ -460,77 +460,6 @@ inline bool CheckNodeIndegreeEquals(const Node &node, size_t n) {
return
node
.
inputs
.
size
()
==
n
;
}
NodesTSIterator
::
NodesTSIterator
(
const
std
::
vector
<
Node
*>
&
source
)
{
PADDLE_ENFORCE
(
!
source
.
empty
(),
"Start points of topological sorting should not be empty!"
);
// CHECK all the inputs' in-degree is 0
for
(
auto
*
node
:
source
)
{
PADDLE_ENFORCE
(
CheckNodeIndegreeEquals
(
*
node
,
0
));
}
std
::
unordered_set
<
Node
*>
visited
;
std
::
unordered_set
<
Node
*>
to_visit
{
source
.
begin
(),
source
.
end
()};
std
::
vector
<
Node
*>
inlink_visited
;
while
(
!
to_visit
.
empty
())
{
std
::
vector
<
Node
*>
queue
(
to_visit
.
begin
(),
to_visit
.
end
());
for
(
auto
*
p
:
queue
)
{
if
(
Agent
(
p
).
deleted
())
{
visited
.
insert
(
p
);
to_visit
.
erase
(
p
);
}
inlink_visited
.
clear
();
std
::
copy_if
(
p
->
inputs
.
begin
(),
p
->
inputs
.
end
(),
std
::
back_inserter
(
inlink_visited
),
[
&
](
Node
*
x
)
->
bool
{
return
visited
.
count
(
x
)
!=
0
;
});
if
(
inlink_visited
.
size
()
==
p
->
inputs
.
size
())
{
sorted_
.
push_back
(
p
);
for
(
auto
*
_
:
p
->
outputs
)
{
if
(
!
visited
.
count
(
_
))
{
to_visit
.
insert
(
_
);
}
}
to_visit
.
erase
(
p
);
visited
.
insert
(
p
);
}
}
}
}
NodesTSIterator
::
NodesTSIterator
(
const
NodesTSIterator
&
other
)
:
sorted_
(
other
.
sorted_
),
cursor_
(
other
.
cursor_
)
{}
Node
&
NodesTSIterator
::
operator
*
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
*
sorted_
[
cursor_
];
}
NodesTSIterator
&
NodesTSIterator
::
operator
++
()
{
if
(
++
cursor_
>=
sorted_
.
size
())
{
sorted_
.
clear
();
cursor_
=
0
;
}
return
*
this
;
}
NodesTSIterator
&
NodesTSIterator
::
operator
=
(
const
NodesTSIterator
&
other
)
{
cursor_
=
other
.
cursor_
;
sorted_
=
other
.
sorted_
;
return
*
this
;
}
bool
NodesTSIterator
::
operator
==
(
const
NodesTSIterator
&
other
)
{
return
sorted_
==
other
.
sorted_
&&
cursor_
==
other
.
cursor_
;
}
Node
*
NodesTSIterator
::
operator
->
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
sorted_
[
cursor_
];
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h
浏览文件 @
5998d3cc
...
...
@@ -30,6 +30,7 @@ namespace inference {
namespace
analysis
{
using
framework
::
ir
::
Graph
;
using
framework
::
ir
::
NodesTSIterator
;
const
char
kIsFunctionNode
[]
=
"__is_function_node__"
;
const
char
kFunctionNodeSubGraph
[]
=
"__function_node_sub_graph__"
;
...
...
@@ -132,32 +133,6 @@ struct Agent {
framework
::
ir
::
Node
*
x_
;
};
// Topological sorting iterator on nodes.
struct
NodesTSIterator
:
public
std
::
iterator
<
std
::
forward_iterator_tag
,
framework
::
ir
::
Node
*>
{
NodesTSIterator
()
=
default
;
explicit
NodesTSIterator
(
const
std
::
vector
<
framework
::
ir
::
Node
*>
&
source
);
NodesTSIterator
(
NodesTSIterator
&&
other
)
:
sorted_
(
std
::
move
(
other
.
sorted_
)),
cursor_
(
other
.
cursor_
)
{
other
.
cursor_
=
0
;
}
NodesTSIterator
(
const
NodesTSIterator
&
other
);
framework
::
ir
::
Node
&
operator
*
();
NodesTSIterator
&
operator
++
();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesTSIterator
&
operator
=
(
const
NodesTSIterator
&
other
);
bool
operator
==
(
const
NodesTSIterator
&
other
);
bool
operator
!=
(
const
NodesTSIterator
&
other
)
{
return
!
(
*
this
==
other
);
}
framework
::
ir
::
Node
*
operator
->
();
private:
std
::
vector
<
framework
::
ir
::
Node
*>
sorted_
;
size_t
cursor_
{
0
};
};
// The nodes those have no input will be treated as start points.
static
std
::
vector
<
framework
::
ir
::
Node
*>
ExtractStartPoints
(
const
Graph
&
g
)
{
std
::
vector
<
framework
::
ir
::
Node
*>
result
;
...
...
paddle/fluid/operators/controlflow/compare_op.cc
浏览文件 @
5998d3cc
...
...
@@ -51,6 +51,11 @@ class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker {
comment
.
type
));
AddInput
(
"Y"
,
string
::
Sprintf
(
"the right hand operand of %s operator"
,
comment
.
type
));
AddAttr
<
int
>
(
"axis"
,
"The start dimension index for broadcasting Y onto X. [default -1]"
)
.
SetDefault
(
-
1
)
.
EqualGreaterThan
(
-
1
);
AddAttr
<
bool
>
(
"force_cpu"
,
"Force fill output variable to cpu "
"memory. Otherwise, fill output variable to the running "
...
...
@@ -64,11 +69,6 @@ N-dim tensor. X and Y could be any type. The each element of the Out tensor is
calculated by $%s$
)DOC"
,
comment
.
equation
));
AddAttr
<
int
>
(
"axis"
,
"The start dimension index for broadcasting Y onto X. [default -1]"
)
.
SetDefault
(
-
1
)
.
EqualGreaterThan
(
-
1
);
}
};
...
...
paddle/fluid/operators/detection/density_prior_box_op.h
浏览文件 @
5998d3cc
...
...
@@ -72,7 +72,7 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for
#endif
for
(
in
t
i
=
0
;
i
<
fixed_ratios
.
size
();
i
++
)
{
for
(
size_
t
i
=
0
;
i
<
fixed_ratios
.
size
();
i
++
)
{
sqrt_fixed_ratios
.
push_back
(
sqrt
(
fixed_ratios
[
i
]));
}
...
...
@@ -115,11 +115,10 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
}
}
if
(
clip
)
{
platform
::
Transform
<
platform
::
CPUDeviceContext
>
trans
;
ClipFunctor
<
T
>
clip_func
;
trans
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
boxes
->
data
<
T
>
(),
boxes
->
data
<
T
>
()
+
boxes
->
numel
(),
boxes
->
data
<
T
>
(),
clip_func
);
T
*
dt
=
boxes
->
data
<
T
>
();
std
::
transform
(
dt
,
dt
+
boxes
->
numel
(),
dt
,
[](
T
v
)
->
T
{
return
std
::
min
<
T
>
(
std
::
max
<
T
>
(
v
,
0.
),
1.
);
});
}
framework
::
Tensor
var_t
;
var_t
.
mutable_data
<
T
>
(
...
...
@@ -141,7 +140,7 @@ class DensityPriorBoxOpKernel : public framework::OpKernel<T> {
#pragma omp parallel for collapse(2)
#endif
for
(
int
i
=
0
;
i
<
box_num
;
++
i
)
{
for
(
in
t
j
=
0
;
j
<
variances
.
size
();
++
j
)
{
for
(
size_
t
j
=
0
;
j
<
variances
.
size
();
++
j
)
{
e_vars
(
i
,
j
)
=
variances
[
j
];
}
}
...
...
paddle/fluid/operators/detection/prior_box_op.h
浏览文件 @
5998d3cc
...
...
@@ -46,13 +46,6 @@ inline void ExpandAspectRatios(const std::vector<float>& input_aspect_ratior,
}
}
template
<
typename
T
>
struct
ClipFunctor
{
HOSTDEVICE
inline
T
operator
()(
T
in
)
const
{
return
std
::
min
<
T
>
(
std
::
max
<
T
>
(
in
,
0.
),
1.
);
}
};
template
<
typename
T
>
class
PriorBoxOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -101,31 +94,30 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
boxes
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
vars
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
e_boxes
=
framework
::
EigenTensor
<
T
,
4
>::
From
(
*
boxes
);
T
*
b_t
=
boxes
->
data
<
T
>
(
);
for
(
int
h
=
0
;
h
<
feature_height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
feature_width
;
++
w
)
{
T
center_x
=
(
w
+
offset
)
*
step_width
;
T
center_y
=
(
h
+
offset
)
*
step_height
;
T
box_width
,
box_height
;
int
idx
=
0
;
for
(
size_t
s
=
0
;
s
<
min_sizes
.
size
();
++
s
)
{
auto
min_size
=
min_sizes
[
s
];
if
(
min_max_aspect_ratios_order
)
{
box_width
=
box_height
=
min_size
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
b_t
+=
4
;
if
(
max_sizes
.
size
()
>
0
)
{
auto
max_size
=
max_sizes
[
s
];
// square prior with size sqrt(minSize * maxSize)
box_width
=
box_height
=
sqrt
(
min_size
*
max_size
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
b_t
+=
4
;
}
// priors with different aspect ratios
for
(
size_t
r
=
0
;
r
<
aspect_ratios
.
size
();
++
r
)
{
...
...
@@ -135,11 +127,11 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
}
box_width
=
min_size
*
sqrt
(
ar
)
/
2.
;
box_height
=
min_size
/
sqrt
(
ar
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
b_t
+=
4
;
}
}
else
{
// priors with different aspect ratios
...
...
@@ -147,21 +139,21 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
float
ar
=
aspect_ratios
[
r
];
box_width
=
min_size
*
sqrt
(
ar
)
/
2.
;
box_height
=
min_size
/
sqrt
(
ar
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
b_t
+=
4
;
}
if
(
max_sizes
.
size
()
>
0
)
{
auto
max_size
=
max_sizes
[
s
];
// square prior with size sqrt(minSize * maxSize)
box_width
=
box_height
=
sqrt
(
min_size
*
max_size
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
b_t
[
0
]
=
(
center_x
-
box_width
)
/
img_width
;
b_t
[
1
]
=
(
center_y
-
box_height
)
/
img_height
;
b_t
[
2
]
=
(
center_x
+
box_width
)
/
img_width
;
b_t
[
3
]
=
(
center_y
+
box_height
)
/
img_height
;
b_t
+=
4
;
}
}
}
...
...
@@ -169,11 +161,10 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
}
if
(
clip
)
{
platform
::
Transform
<
platform
::
CPUDeviceContext
>
trans
;
ClipFunctor
<
T
>
clip_func
;
trans
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
boxes
->
data
<
T
>
(),
boxes
->
data
<
T
>
()
+
boxes
->
numel
(),
boxes
->
data
<
T
>
(),
clip_func
);
T
*
dt
=
boxes
->
data
<
T
>
();
std
::
transform
(
dt
,
dt
+
boxes
->
numel
(),
dt
,
[](
T
v
)
->
T
{
return
std
::
min
<
T
>
(
std
::
max
<
T
>
(
v
,
0.
),
1.
);
});
}
framework
::
Tensor
var_t
;
...
...
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
浏览文件 @
5998d3cc
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/fluid/operators/math/blas.h"
namespace
paddle
{
...
...
@@ -37,32 +38,24 @@ struct EmbeddingVSumFunctor {
const
LoDTensor
*
table_t
,
const
LoDTensor
*
ids_t
,
LoDTensor
*
output_t
)
{
auto
*
table
=
table_t
->
data
<
T
>
();
int64_t
row_number
=
table_t
->
dims
()[
0
];
int64_t
row
_width
=
table_t
->
dims
()[
1
];
int64_t
last_dim
=
output_t
->
dims
()[
1
];
int64_t
table_height
=
table_t
->
dims
()[
0
];
int64_t
table
_width
=
table_t
->
dims
()[
1
];
int64_t
out_width
=
output_t
->
dims
()[
1
];
const
int64_t
*
ids
=
ids_t
->
data
<
int64_t
>
();
auto
ids_lod
=
ids_t
->
lod
()[
0
];
int64_t
ids_count
=
ids_t
->
numel
()
/
ids_lod
.
back
();
int64_t
idx_width
=
ids_t
->
numel
()
/
ids_lod
.
back
();
auto
*
output
=
output_t
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
context
);
for
(
int64_t
i
=
0
;
i
!=
ids_lod
.
size
()
-
1
;
++
i
)
{
size_t
begin
=
ids_lod
[
i
]
*
ids_count
;
for
(
int64_t
j
=
0
;
j
!=
ids_count
;
++
j
)
{
PADDLE_ENFORCE_LT
(
ids
[
begin
],
row_number
);
PADDLE_ENFORCE_GE
(
ids
[
begin
],
0
,
"ids %d"
,
i
);
blas
.
VCOPY
(
row_width
,
table
+
ids
[
begin
+
j
]
*
row_width
,
output
+
i
*
last_dim
+
j
*
row_width
);
}
PADDLE_ENFORCE_LE
(
table_width
*
idx_width
,
out_width
);
for
(
int64_t
r
=
(
ids_lod
[
i
]
+
1
)
*
ids_count
;
r
<
ids_lod
[
i
+
1
]
*
ids_count
;
++
r
)
{
PADDLE_ENFORCE_LT
(
ids
[
r
],
row_number
);
PADDLE_ENFORCE_GE
(
ids
[
r
],
0
,
"ids %d"
,
i
);
blas
.
AXPY
(
row_width
,
1.
,
table
+
ids
[
r
]
*
row_width
,
output
+
i
*
last_dim
+
(
r
%
ids_count
)
*
row_width
);
}
jit
::
emb_seq_pool_attr_t
attr
(
table_height
,
table_width
,
0
,
idx_width
,
out_width
,
jit
::
SeqPoolType
::
kSum
);
for
(
int64_t
i
=
0
;
i
!=
ids_lod
.
size
()
-
1
;
++
i
)
{
attr
.
index_height
=
ids_lod
[
i
+
1
]
-
ids_lod
[
i
];
auto
emb_seqpool
=
jit
::
Get
<
jit
::
kEmbSeqPool
,
jit
::
EmbSeqPoolTuples
<
T
>
,
platform
::
CPUPlace
>
(
attr
);
emb_seqpool
(
table
,
ids
+
ids_lod
[
i
]
*
idx_width
,
output
+
i
*
out_width
,
&
attr
);
}
}
};
...
...
paddle/fluid/operators/group_norm_op.cc
浏览文件 @
5998d3cc
...
...
@@ -170,13 +170,48 @@ class GroupNormGradMaker : public framework::SingleGradOpDescMaker {
}
};
class
GroupNormInplaceInToOut
:
public
framework
::
InplaceInToOut
{
public:
using
InplaceInToOut
::
InplaceInToOut
;
protected:
std
::
unordered_map
<
std
::
string
,
std
::
string
>
Apply
(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
return
{{
"X"
,
"Y"
}};
}
};
class
GroupNormGradInplaceInToOut
:
public
framework
::
InplaceInToOut
{
public:
using
InplaceInToOut
::
InplaceInToOut
;
protected:
std
::
unordered_map
<
std
::
string
,
std
::
string
>
Apply
(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
return
{{
framework
::
GradVarName
(
"Y"
),
framework
::
GradVarName
(
"X"
)}};
}
};
class
GroupNormOpInferVarType
:
public
framework
::
PassInDtypeAndVarTypeToOutput
{
protected:
std
::
unordered_map
<
std
::
string
,
std
::
string
>
GetInputOutputWithSameType
()
const
override
{
return
{{
"X"
,
/*->*/
"Y"
}};
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
group_norm
,
ops
::
GroupNormOp
,
ops
::
GroupNormOpMaker
,
ops
::
GroupNormGradMaker
);
REGISTER_OPERATOR
(
group_norm_grad
,
ops
::
GroupNormGradOp
);
ops
::
GroupNormOpInferVarType
,
ops
::
GroupNormGradMaker
,
ops
::
GroupNormInplaceInToOut
);
REGISTER_OPERATOR
(
group_norm_grad
,
ops
::
GroupNormGradOp
,
ops
::
GroupNormGradInplaceInToOut
);
REGISTER_OP_CPU_KERNEL
(
group_norm
,
ops
::
GroupNormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GroupNormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
...
...
paddle/fluid/operators/jit/benchmark.cc
浏览文件 @
5998d3cc
...
...
@@ -301,6 +301,37 @@ void BenchSeqPoolKernel() {
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchEmbSeqPoolKernel
()
{
std
::
vector
<
jit
::
SeqPoolType
>
pool_types
=
{
jit
::
SeqPoolType
::
kSum
};
int64_t
tbl_h
=
1e4
;
for
(
int
tbl_w
:
{
10
,
16
,
256
})
{
Tensor
table
;
table
.
Resize
({
tbl_h
,
tbl_w
});
RandomVec
<
T
>
(
tbl_h
*
tbl_w
,
table
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
const
T
*
table_data
=
table
.
data
<
T
>
();
for
(
auto
type
:
pool_types
)
{
for
(
int
idx_w
:
{
1
,
2
,
10
,
16
})
{
for
(
int
idx_h
:
{
1
,
2
,
9
,
13
,
16
})
{
int64_t
out_w
=
tbl_w
*
idx_w
;
jit
::
emb_seq_pool_attr_t
attr
(
tbl_h
,
tbl_w
,
idx_h
,
idx_w
,
out_w
,
type
);
Tensor
idx
,
out
;
idx
.
Resize
({
idx_h
,
idx_w
});
out
.
Resize
({
out_w
});
RandomVec
<
int64_t
>
(
idx_h
*
idx_w
,
idx
.
mutable_data
<
int64_t
>
(
PlaceType
()),
0
,
tbl_h
-
1
);
const
int64_t
*
idx_data
=
idx
.
data
<
int64_t
>
();
T
*
o_data
=
out
.
mutable_data
<
T
>
(
PlaceType
());
BenchAllImpls
<
KT
,
jit
::
EmbSeqPoolTuples
<
T
>
,
PlaceType
>
(
attr
,
table_data
,
idx_data
,
o_data
,
&
attr
);
}
}
}
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchMatMulKernel
()
{
for
(
int
m
:
{
1
,
2
,
3
,
4
})
{
...
...
@@ -339,6 +370,71 @@ void BenchSoftmaxKernel() {
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchLayerNormKernel
()
{
const
T
epsilon
=
9.99999975e-06
;
for
(
int
n
:
{
1
,
2
,
10
})
{
for
(
int
x_dim_0
:
{
1
,
9
,
17
,
50
})
{
int
left
=
n
*
x_dim_0
;
for
(
int
x_dim_1
:
TestSizes
())
{
int
right
=
x_dim_1
;
int
sz
=
left
*
right
;
Tensor
x
,
mean
,
var
,
scale
,
bias
,
out
;
x
.
Resize
({
n
,
x_dim_0
,
x_dim_1
});
out
.
Resize
({
n
,
x_dim_0
,
x_dim_1
});
mean
.
Resize
({
n
,
x_dim_0
});
var
.
Resize
({
n
,
x_dim_0
});
scale
.
Resize
({
x_dim_1
});
bias
.
Resize
({
x_dim_1
});
RandomVec
<
T
>
(
sz
,
x
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
left
,
mean
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
left
,
var
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
right
,
scale
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
right
,
bias
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
const
T
*
scale_data
=
scale
.
data
<
T
>
();
const
T
*
bias_data
=
bias
.
data
<
T
>
();
T
*
x_data
=
x
.
data
<
T
>
();
T
*
mean_data
=
mean
.
data
<
T
>
();
T
*
var_data
=
var
.
data
<
T
>
();
T
*
out_data
=
out
.
mutable_data
<
T
>
(
PlaceType
());
BenchAllImpls
<
KT
,
jit
::
LayerNormTuples
<
T
>
,
PlaceType
>
(
right
,
x_data
,
out_data
,
mean_data
,
var_data
,
scale_data
,
bias_data
,
left
,
epsilon
,
right
);
}
}
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchCRFDecodingKernel
()
{
constexpr
int
state_trans_base_idx
=
2
;
for
(
int
seq_len
:
{
1
,
11
,
17
,
50
})
{
for
(
int
tag_num
:
TestSizes
())
{
int
x_sz
=
seq_len
*
tag_num
;
int
w_sz
=
(
tag_num
+
state_trans_base_idx
)
*
tag_num
;
Tensor
x
,
w
,
alpha
,
track
;
x
.
Resize
({
seq_len
,
tag_num
});
w
.
Resize
({
tag_num
+
state_trans_base_idx
,
tag_num
});
alpha
.
Resize
({
seq_len
,
tag_num
});
track
.
Resize
({
seq_len
,
tag_num
});
RandomVec
<
T
>
(
x_sz
,
x
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
w_sz
,
w
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
const
T
*
x_data
=
x
.
data
<
T
>
();
const
T
*
w_data
=
w
.
data
<
T
>
();
T
*
alpha_data
=
alpha
.
mutable_data
<
T
>
(
PlaceType
());
int
*
track_data
=
track
.
mutable_data
<
int
>
(
PlaceType
());
BenchAllImpls
<
KT
,
jit
::
CRFDecodingTuples
<
T
>
,
PlaceType
>
(
tag_num
,
seq_len
,
x_data
,
w_data
,
alpha_data
,
track_data
,
tag_num
);
}
}
}
using
T
=
float
;
using
CPUPlace
=
paddle
::
platform
::
CPUPlace
;
...
...
@@ -376,12 +472,27 @@ BENCH_FP32_CPU(kGRUHtPart2) { BenchGRUKernel<jit::kGRUHtPart2, T, CPUPlace>(); }
// seq pool function
BENCH_FP32_CPU
(
kSeqPool
)
{
BenchSeqPoolKernel
<
jit
::
kSeqPool
,
T
,
CPUPlace
>
();
}
// embedding seq pool function
BENCH_FP32_CPU
(
kEmbSeqPool
)
{
BenchEmbSeqPoolKernel
<
jit
::
kEmbSeqPool
,
T
,
CPUPlace
>
();
}
// matmul
BENCH_FP32_CPU
(
kMatMul
)
{
BenchMatMulKernel
<
jit
::
kMatMul
,
T
,
CPUPlace
>
();
}
// softmax
BENCH_FP32_CPU
(
kSoftmax
)
{
BenchSoftmaxKernel
<
jit
::
kSoftmax
,
T
,
CPUPlace
>
();
}
// layernorm
BENCH_FP32_CPU
(
kLayerNorm
)
{
BenchLayerNormKernel
<
jit
::
kLayerNorm
,
T
,
CPUPlace
>
();
}
// crfdecoding
BENCH_FP32_CPU
(
kCRFDecoding
)
{
BenchCRFDecodingKernel
<
jit
::
kCRFDecoding
,
T
,
CPUPlace
>
();
}
// Benchmark all jit kernels including jitcode, mkl and refer.
// To use this tool, run command: ./benchmark [options...]
// Options:
...
...
paddle/fluid/operators/jit/gen/CMakeLists.txt
浏览文件 @
5998d3cc
...
...
@@ -31,3 +31,4 @@ USE_JITKERNEL_GEN(kNCHW16CMulNC)
USE_JITKERNEL_GEN
(
kSeqPool
)
USE_JITKERNEL_GEN
(
kHMax
)
USE_JITKERNEL_GEN
(
kHSum
)
USE_JITKERNEL_GEN
(
kEmbSeqPool
)
paddle/fluid/operators/jit/gen/embseqpool.cc
0 → 100644
浏览文件 @
5998d3cc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#include "paddle/fluid/operators/jit/gen/embseqpool.h"
#include <stddef.h> // offsetof
#include <vector>
#include "paddle/fluid/operators/jit/gen/act.h" // for exp_float_consts ones
#include "paddle/fluid/operators/jit/registry.h"
#include "paddle/fluid/platform/cpu_info.h"
namespace
paddle
{
namespace
operators
{
namespace
jit
{
namespace
gen
{
void
EmbSeqPoolJitCode
::
genCode
()
{
preCode
();
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
constexpr
int
max_num_regs
=
8
;
const
int
num_block
=
tbl_w_
/
block
;
const
int
num_groups
=
num_block
/
max_num_regs
;
const
size_t
block_size
=
sizeof
(
float
)
*
block
;
std
::
vector
<
int
>
groups
(
num_groups
,
max_num_regs
);
int
rest_num_regs
=
num_block
%
max_num_regs
;
if
(
rest_num_regs
>
0
)
{
groups
.
push_back
(
rest_num_regs
);
}
// protect param_dst
mov
(
reg_ptr_param_dst
,
param_dst
);
mov
(
reg_idx_width_in_byte
,
qword
[
param_attr
+
offsetof
(
emb_seq_pool_attr_t
,
index_width
)]);
mov
(
reg_idx_height
,
qword
[
param_attr
+
offsetof
(
emb_seq_pool_attr_t
,
index_height
)]);
mov
(
rax
,
sizeof
(
int64_t
));
mul
(
reg_idx_width_in_byte
);
mov
(
reg_idx_width_in_byte
,
rax
);
const
size_t
tbl_width_in_byte
=
sizeof
(
float
)
*
tbl_w_
;
int
acc_num_regs
=
0
;
for
(
int
num_regs
:
groups
)
{
Label
l_next_idx_w
,
l_next_idx_h
,
l_save_now
;
xor_
(
reg_idx_w_i_in_byte
,
reg_idx_w_i_in_byte
);
mov
(
reg_ptr_dst_i
,
reg_ptr_param_dst
);
add
(
reg_ptr_dst_i
,
acc_num_regs
*
block_size
);
L
(
l_next_idx_w
);
{
// h == 0
mov
(
reg_ptr_idx_i
,
param_idx
);
add
(
reg_ptr_idx_i
,
reg_idx_w_i_in_byte
);
mov
(
reg_idx
,
qword
[
reg_ptr_idx_i
]);
mov
(
rax
,
tbl_width_in_byte
);
mul
(
reg_idx
);
mov
(
reg_ptr_tbl_i
,
rax
);
// reg is offset now
add
(
reg_ptr_tbl_i
,
param_tbl
);
// reg is ptr_i now
size_t
w_offset
=
0
;
for
(
int
reg_i
=
0
;
reg_i
<
num_regs
;
++
reg_i
)
{
vmovups
(
ymm_t
(
reg_i
+
num_regs
),
ptr
[
reg_ptr_tbl_i
+
w_offset
]);
w_offset
+=
block_size
;
}
add
(
reg_ptr_idx_i
,
reg_idx_width_in_byte
);
// end condition of idx h
mov
(
reg_idx_h_end
,
reg_idx_height
);
mov
(
rax
,
reg_idx_width_in_byte
);
mul
(
reg_idx_h_end
);
mov
(
reg_idx_h_end
,
rax
);
add
(
reg_idx_h_end
,
reg_idx_w_i_in_byte
);
add
(
reg_idx_h_end
,
param_idx
);
cmp
(
reg_ptr_idx_i
,
reg_idx_h_end
);
jge
(
l_save_now
,
T_NEAR
);
L
(
l_next_idx_h
);
{
mov
(
reg_idx
,
qword
[
reg_ptr_idx_i
]);
mov
(
reg_ptr_tbl_i
,
reg_idx
);
mov
(
rax
,
tbl_width_in_byte
);
mul
(
reg_idx
);
mov
(
reg_ptr_tbl_i
,
rax
);
add
(
reg_ptr_tbl_i
,
param_tbl
);
size_t
w_offset
=
0
;
for
(
int
reg_i
=
0
;
reg_i
<
num_regs
;
++
reg_i
)
{
vmovups
(
ymm_t
(
reg_i
),
ptr
[
reg_ptr_tbl_i
+
w_offset
]);
vaddps
(
ymm_t
(
reg_i
+
num_regs
),
ymm_t
(
reg_i
+
num_regs
),
ymm_t
(
reg_i
));
w_offset
+=
block_size
;
}
add
(
reg_ptr_idx_i
,
reg_idx_width_in_byte
);
cmp
(
reg_ptr_idx_i
,
reg_idx_h_end
);
jl
(
l_next_idx_h
,
T_NEAR
);
}
// end of idx h
L
(
l_save_now
);
// avg or sqrt here, if needed
w_offset
=
0
;
for
(
int
reg_i
=
0
;
reg_i
<
num_regs
;
++
reg_i
)
{
vmovups
(
ptr
[
reg_ptr_dst_i
+
w_offset
],
ymm_t
(
reg_i
+
num_regs
));
w_offset
+=
block_size
;
}
add
(
reg_ptr_dst_i
,
tbl_width_in_byte
);
add
(
reg_idx_w_i_in_byte
,
sizeof
(
int64_t
));
cmp
(
reg_idx_w_i_in_byte
,
reg_idx_width_in_byte
);
jl
(
l_next_idx_w
,
T_NEAR
);
}
// end of idx w
acc_num_regs
+=
num_regs
;
add
(
param_tbl
,
num_regs
*
block_size
);
// do not use acc_num_regs
}
// end of groups
postCode
();
}
class
EmbSeqPoolCreator
:
public
JitCodeCreator
<
emb_seq_pool_attr_t
>
{
public:
bool
UseMe
(
const
emb_seq_pool_attr_t
&
attr
)
const
override
{
return
platform
::
MayIUse
(
platform
::
avx
)
&&
attr
.
table_width
%
YMM_FLOAT_BLOCK
==
0
;
}
size_t
CodeSize
(
const
emb_seq_pool_attr_t
&
attr
)
const
override
{
return
96
+
(
attr
.
table_width
/
YMM_FLOAT_BLOCK
)
*
96
*
8
;
}
std
::
unique_ptr
<
GenBase
>
CreateJitCode
(
const
emb_seq_pool_attr_t
&
attr
)
const
override
{
PADDLE_ENFORCE_GT
(
attr
.
table_height
,
0
);
PADDLE_ENFORCE_GT
(
attr
.
table_width
,
0
);
PADDLE_ENFORCE_GT
(
attr
.
index_height
,
0
);
PADDLE_ENFORCE_GT
(
attr
.
index_width
,
0
);
PADDLE_ENFORCE_GT
(
attr
.
out_width
,
0
);
return
make_unique
<
EmbSeqPoolJitCode
>
(
attr
,
CodeSize
(
attr
));
}
};
}
// namespace gen
}
// namespace jit
}
// namespace operators
}
// namespace paddle
namespace
gen
=
paddle
::
operators
::
jit
::
gen
;
REGISTER_JITKERNEL_GEN
(
kEmbSeqPool
,
gen
::
EmbSeqPoolCreator
);
paddle/fluid/operators/jit/gen/embseqpool.h
0 → 100644
浏览文件 @
5998d3cc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#pragma once
#include <string>
#include "glog/logging.h"
#include "paddle/fluid/operators/jit/gen/jitcode.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
operators
{
namespace
jit
{
namespace
gen
{
class
EmbSeqPoolJitCode
:
public
JitCode
{
public:
explicit
EmbSeqPoolJitCode
(
const
emb_seq_pool_attr_t
&
attr
,
size_t
code_size
=
256
*
1024
,
void
*
code_ptr
=
nullptr
)
:
JitCode
(
code_size
,
code_ptr
),
tbl_w_
(
attr
.
table_width
),
type_
(
attr
.
pool_type
)
{
if
(
type_
!=
SeqPoolType
::
kSum
)
{
LOG
(
FATAL
)
<<
"Only support sum pool yet "
;
}
this
->
genCode
();
}
std
::
string
name
()
const
override
{
std
::
string
base
=
"EmbSeqPoolJitCode"
;
if
(
type_
==
SeqPoolType
::
kSum
)
{
base
+=
"_Sum"
;
}
else
if
(
type_
==
SeqPoolType
::
kAvg
)
{
base
+=
"_Avg"
;
}
else
if
(
type_
==
SeqPoolType
::
kSqrt
)
{
base
+=
"_Sqrt"
;
}
base
+=
(
"_W"
+
std
::
to_string
(
tbl_w_
));
return
base
;
}
void
genCode
()
override
;
private:
int
tbl_w_
;
SeqPoolType
type_
;
reg64_t
param_tbl
{
abi_param1
};
reg64_t
param_idx
{
abi_param2
};
reg64_t
param_dst
{
abi_param3
};
reg64_t
param_attr
{
abi_param4
};
reg64_t
reg_tmp
{
rax
};
reg64_t
reg_idx_width_in_byte
{
r8
};
reg64_t
reg_idx_height
{
r9
};
reg64_t
reg_ptr_tbl_i
{
r10
};
reg64_t
reg_idx
{
r10
};
// could use same of reg_ptr_tbl_i
reg64_t
reg_ptr_idx_i
{
r11
};
reg64_t
reg_ptr_dst_i
{
r12
};
reg64_t
reg_ptr_param_dst
{
r13
};
// rdx is used in mul so protect param_dst
reg64_t
reg_idx_w_i_in_byte
{
r14
};
reg64_t
reg_idx_h_end
{
r15
};
};
}
// namespace gen
}
// namespace jit
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/jit/gen/seqpool.h
浏览文件 @
5998d3cc
...
...
@@ -32,7 +32,7 @@ class SeqPoolJitCode : public JitCode {
:
JitCode
(
code_size
,
code_ptr
),
w_
(
attr
.
w
),
type_
(
attr
.
type
)
{
if
(
!
(
type_
==
SeqPoolType
::
kSum
||
type_
==
SeqPoolType
::
kAvg
||
type_
==
SeqPoolType
::
kSqrt
))
{
LOG
(
FATAL
)
<<
"Only support
sum pool yet
"
;
LOG
(
FATAL
)
<<
"Only support
ed pool type: sum, avg and sqrt.
"
;
}
fp_h_
[
0
]
=
1.
f
;
this
->
genCode
();
...
...
paddle/fluid/operators/jit/helper.cc
浏览文件 @
5998d3cc
...
...
@@ -54,6 +54,7 @@ const char* to_string(KernelType kt) {
ONE_CASE
(
kHMax
);
ONE_CASE
(
kHSum
);
ONE_CASE
(
kSoftmax
);
ONE_CASE
(
kEmbSeqPool
);
default:
PADDLE_THROW
(
"Not support type: %d, or forget to add it."
,
kt
);
return
"NOT JITKernel"
;
...
...
paddle/fluid/operators/jit/helper.h
浏览文件 @
5998d3cc
...
...
@@ -172,6 +172,15 @@ inline std::ostream& operator<<(std::ostream& os, const seq_pool_attr_t& attr) {
return
os
;
}
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
emb_seq_pool_attr_t
&
attr
)
{
os
<<
"table_height["
<<
attr
.
table_height
<<
"],table_width["
<<
attr
.
table_width
<<
"],index_height["
<<
attr
.
index_height
<<
"],index_width["
<<
attr
.
index_width
<<
"],output_width["
<<
attr
.
out_width
<<
"],pool_type["
<<
to_string
(
attr
.
pool_type
)
<<
"]"
;
return
os
;
}
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
matmul_attr_t
&
attr
)
{
os
<<
"M["
<<
attr
.
m
<<
"],N["
<<
attr
.
n
<<
"],K["
<<
attr
.
k
<<
"]"
;
return
os
;
...
...
paddle/fluid/operators/jit/kernel_base.h
浏览文件 @
5998d3cc
...
...
@@ -13,6 +13,7 @@
* limitations under the License. */
#pragma once
#include <cstdint>
#include "paddle/fluid/operators/jit/macro.h"
#include "paddle/fluid/platform/macros.h"
...
...
@@ -20,34 +21,35 @@ namespace paddle {
namespace
operators
{
namespace
jit
{
// TODO(TJ): reorder by alphabet
typedef
enum
{
kNone
=
0
,
kVMul
=
1
,
kVAdd
=
2
,
kVAddRelu
,
kVSub
,
kVScal
,
kVAddBias
,
kVRelu
,
kVIdentity
,
kVSquare
,
kVExp
,
kVSigmoid
,
kVTanh
,
kLSTMCtHt
,
kLSTMC1H1
,
// sort by alphabet
kCRFDecoding
=
1
,
kEmbSeqPool
=
2
,
kGRUH1
,
kGRUHtPart1
,
kGRUHtPart2
,
kCRFDecoding
,
kHSum
,
// horizontal max
kHMax
,
// horizontal sum
kLSTMCtHt
,
kLSTMC1H1
,
kLayerNorm
,
kMatMul
,
kNCHW16CMulNC
,
kSeqPool
,
kMatMul
,
kHSum
,
// horizontal max
kHMax
,
// horizontal sum
kSoftmax
,
kVAdd
,
kVAddBias
,
kVAddRelu
,
kVExp
,
kVIdentity
,
kVMul
,
kVRelu
,
kVScal
,
kVSigmoid
,
kVSquare
,
kVSub
,
kVTanh
,
}
KernelType
;
typedef
enum
{
...
...
@@ -145,6 +147,32 @@ struct SeqPoolTuples {
typedef
void
(
*
func_type
)(
const
T
*
,
T
*
,
const
seq_pool_attr_t
*
);
};
typedef
struct
emb_seq_pool_attr_s
{
int64_t
table_height
,
table_width
;
int64_t
index_height
,
index_width
;
int64_t
out_width
;
SeqPoolType
pool_type
;
emb_seq_pool_attr_s
()
=
default
;
explicit
emb_seq_pool_attr_s
(
int64_t
tbl_height
,
int64_t
tbl_width
,
int64_t
idx_height
,
int64_t
idx_width
,
int64_t
output_width
,
SeqPoolType
seqpool_type
=
SeqPoolType
::
kSum
)
:
table_height
(
tbl_height
),
table_width
(
tbl_width
),
index_height
(
idx_height
),
index_width
(
idx_width
),
out_width
(
output_width
),
pool_type
(
seqpool_type
)
{}
}
emb_seq_pool_attr_t
;
template
<
typename
T
>
struct
EmbSeqPoolTuples
{
typedef
T
data_type
;
typedef
emb_seq_pool_attr_t
attr_type
;
typedef
void
(
*
func_type
)(
const
T
*
,
const
int64_t
*
,
T
*
,
const
emb_seq_pool_attr_t
*
);
};
typedef
struct
matmul_attr_s
{
int
m
,
n
,
k
;
void
*
packed_weight
{
nullptr
};
...
...
paddle/fluid/operators/jit/kernel_key.cc
浏览文件 @
5998d3cc
...
...
@@ -56,6 +56,11 @@ size_t JitCodeKey<matmul_attr_t>(const matmul_attr_t& attr) {
return
(
key
<<
shift
*
2
)
+
((
static_cast
<
size_t
>
(
attr
.
n
))
<<
shift
)
+
attr
.
k
;
}
template
<
>
size_t
JitCodeKey
<
emb_seq_pool_attr_t
>
(
const
emb_seq_pool_attr_t
&
attr
)
{
return
attr
.
table_width
;
}
}
// namespace jit
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
浏览文件 @
5998d3cc
...
...
@@ -13,3 +13,4 @@ USE_JITKERNEL_MORE(kVSigmoid, mkl)
USE_JITKERNEL_MORE
(
kVTanh, mkl
)
USE_JITKERNEL_MORE
(
kSeqPool, mkl
)
USE_JITKERNEL_MORE
(
kSoftmax, mkl
)
USE_JITKERNEL_MORE
(
kEmbSeqPool, mkl
)
paddle/fluid/operators/jit/more/mkl/mkl.cc
浏览文件 @
5998d3cc
...
...
@@ -174,6 +174,16 @@ bool SeqPoolKernel<double>::UseMe(const seq_pool_attr_t& attr) const {
return
true
;
}
template
<
>
bool
EmbSeqPoolKernel
<
float
>::
UseMe
(
const
emb_seq_pool_attr_t
&
attr
)
const
{
return
true
;
}
template
<
>
bool
EmbSeqPoolKernel
<
double
>::
UseMe
(
const
emb_seq_pool_attr_t
&
attr
)
const
{
return
true
;
}
template
<
>
bool
MatMulKernel
<
float
>::
UseMe
(
const
matmul_attr_t
&
attr
)
const
{
return
platform
::
MayIUse
(
platform
::
avx
);
...
...
@@ -227,6 +237,7 @@ REGISTER_MKL_KERNEL(kVSquare, VSquare);
REGISTER_MKL_KERNEL
(
kVSigmoid
,
VSigmoid
);
REGISTER_MKL_KERNEL
(
kVTanh
,
VTanh
);
REGISTER_MKL_KERNEL
(
kSeqPool
,
SeqPool
);
REGISTER_MKL_KERNEL
(
kEmbSeqPool
,
EmbSeqPool
);
REGISTER_MKL_KERNEL
(
kSoftmax
,
Softmax
);
#undef REGISTER_MKL_KERNEL
paddle/fluid/operators/jit/more/mkl/mkl.h
浏览文件 @
5998d3cc
...
...
@@ -18,6 +18,7 @@
#include <type_traits>
#include <vector>
#include "paddle/fluid/operators/jit/kernel_base.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -91,6 +92,32 @@ void SeqPool(const T* x, T* y, const seq_pool_attr_t* attr) {
}
}
template
<
typename
T
>
void
EmbSeqPool
(
const
T
*
table
,
const
int64_t
*
idx
,
T
*
out
,
const
emb_seq_pool_attr_t
*
attr
)
{
PADDLE_ENFORCE_EQ
(
attr
->
table_width
*
attr
->
index_width
,
attr
->
out_width
);
auto
check_idx_value_valid
=
[
&
](
int64_t
i
)
{
PADDLE_ENFORCE_LT
(
idx
[
i
],
attr
->
table_height
,
"idx value: %d, i: %d"
,
idx
[
i
],
i
);
PADDLE_ENFORCE_GE
(
idx
[
i
],
0
,
"idx value: %d, i: %d"
,
idx
[
i
],
i
);
};
for
(
int64_t
w
=
0
;
w
!=
attr
->
index_width
;
++
w
)
{
check_idx_value_valid
(
w
);
VCopy
<
T
>
(
table
+
idx
[
w
]
*
attr
->
table_width
,
out
+
w
*
attr
->
table_width
,
attr
->
table_width
);
}
for
(
int64_t
h
=
1
;
h
<
attr
->
index_height
;
++
h
)
{
for
(
int64_t
w
=
0
;
w
<
attr
->
index_width
;
++
w
)
{
int64_t
i
=
h
*
attr
->
index_width
+
w
;
check_idx_value_valid
(
i
);
VAXPY
<
T
>
(
static_cast
<
T
>
(
1
),
table
+
idx
[
i
]
*
attr
->
table_width
,
out
+
w
*
attr
->
table_width
,
attr
->
table_width
);
}
}
}
template
<
typename
T
>
void
ASum
(
const
T
*
x
,
T
*
res
,
int
n
);
...
...
@@ -142,6 +169,8 @@ DECLARE_MKL_KERNEL(VSquare, XYNTuples);
DECLARE_MKL_KERNEL
(
SeqPool
,
SeqPoolTuples
);
DECLARE_MKL_KERNEL
(
EmbSeqPool
,
EmbSeqPoolTuples
);
DECLARE_MKL_KERNEL
(
Softmax
,
SoftmaxTuples
);
#undef DECLARE_MKL_KERNEL
...
...
paddle/fluid/operators/jit/refer/CMakeLists.txt
浏览文件 @
5998d3cc
...
...
@@ -32,3 +32,4 @@ USE_JITKERNEL_REFER(kVSquare)
USE_JITKERNEL_REFER
(
kHSum
)
USE_JITKERNEL_REFER
(
kHMax
)
USE_JITKERNEL_REFER
(
kSoftmax
)
USE_JITKERNEL_REFER
(
kEmbSeqPool
)
paddle/fluid/operators/jit/refer/refer.cc
浏览文件 @
5998d3cc
...
...
@@ -57,4 +57,6 @@ REGISTER_REFER_KERNEL(kHSum, HSum);
REGISTER_REFER_KERNEL
(
kSoftmax
,
Softmax
);
REGISTER_REFER_KERNEL
(
kEmbSeqPool
,
EmbSeqPool
);
#undef REGISTER_REFER_KERNEL
paddle/fluid/operators/jit/refer/refer.h
浏览文件 @
5998d3cc
...
...
@@ -16,6 +16,7 @@
#include <cmath>
#include <limits>
#include <string>
#include "paddle/fluid/operators/jit/helper.h"
#include "paddle/fluid/operators/jit/kernel_base.h"
#include "paddle/fluid/platform/enforce.h"
...
...
@@ -414,6 +415,37 @@ void Softmax(const T* x, T* y, int n, int bs = 1) {
}
}
// embedding seq pool
// table is a matrix with (tbl_h, tbl_w)
// idx is a matrix with (idx_h, idx_w)
// output is a vector with length tbl_w * idx_w
template
<
typename
T
>
void
EmbSeqPool
(
const
T
*
table
,
const
int64_t
*
idx
,
T
*
out
,
const
emb_seq_pool_attr_t
*
attr
)
{
PADDLE_ENFORCE_EQ
(
attr
->
table_width
*
attr
->
index_width
,
attr
->
out_width
);
auto
check_idx_value_valid
=
[
&
](
int64_t
i
)
{
PADDLE_ENFORCE_LT
(
idx
[
i
],
attr
->
table_height
,
"idx value: %d, i: %d"
,
idx
[
i
],
i
);
PADDLE_ENFORCE_GE
(
idx
[
i
],
0
,
"idx value: %d, i: %d"
,
idx
[
i
],
i
);
};
for
(
int64_t
w
=
0
;
w
!=
attr
->
index_width
;
++
w
)
{
check_idx_value_valid
(
w
);
std
::
memcpy
(
out
+
w
*
attr
->
table_width
,
table
+
idx
[
w
]
*
attr
->
table_width
,
attr
->
table_width
*
sizeof
(
T
));
}
for
(
int64_t
h
=
1
;
h
<
attr
->
index_height
;
++
h
)
{
for
(
int64_t
w
=
0
;
w
<
attr
->
index_width
;
++
w
)
{
int64_t
i
=
h
*
attr
->
index_width
+
w
;
check_idx_value_valid
(
i
);
VAdd
(
table
+
idx
[
i
]
*
attr
->
table_width
,
out
+
w
*
attr
->
table_width
,
out
+
w
*
attr
->
table_width
,
attr
->
table_width
);
}
}
}
#define DECLARE_REFER_KERNEL(name, tuples) \
template <typename T> \
class name##Kernel : public ReferKernel<tuples<T>> { \
...
...
@@ -462,6 +494,8 @@ DECLARE_REFER_KERNEL(HSum, XRNTuples);
DECLARE_REFER_KERNEL
(
Softmax
,
SoftmaxTuples
);
DECLARE_REFER_KERNEL
(
EmbSeqPool
,
EmbSeqPoolTuples
);
#undef DECLARE_REFER_KERNEL
}
// namespace refer
...
...
paddle/fluid/operators/jit/test.cc
浏览文件 @
5998d3cc
...
...
@@ -270,6 +270,32 @@ struct TestFuncWithRefer<jit::SeqPoolTuples<T>, std::vector<T>, std::vector<T>,
}
};
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
EmbSeqPoolTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
int64_t
>
,
std
::
vector
<
T
>
,
typename
jit
::
EmbSeqPoolTuples
<
T
>::
attr_type
>
{
void
operator
()(
const
typename
jit
::
EmbSeqPoolTuples
<
T
>::
func_type
tgt
,
const
std
::
vector
<
T
>&
table
,
const
std
::
vector
<
int64_t
>&
idx
,
const
std
::
vector
<
T
>&
oref
,
const
typename
jit
::
EmbSeqPoolTuples
<
T
>::
attr_type
&
attr
)
{
EXPECT_TRUE
(
tgt
!=
nullptr
);
EXPECT_EQ
(
table
.
size
(),
static_cast
<
size_t
>
(
attr
.
table_height
*
attr
.
table_width
));
EXPECT_EQ
(
idx
.
size
(),
static_cast
<
size_t
>
(
attr
.
index_height
*
attr
.
index_width
));
EXPECT_EQ
(
oref
.
size
(),
static_cast
<
size_t
>
(
attr
.
table_width
*
attr
.
index_width
));
const
T
*
table_data
=
table
.
data
();
const
int64_t
*
idx_data
=
idx
.
data
();
const
T
*
oref_data
=
oref
.
data
();
int
o_w
=
oref
.
size
();
std
::
vector
<
T
>
out
(
o_w
);
T
*
o_data
=
out
.
data
();
tgt
(
table_data
,
idx_data
,
o_data
,
&
attr
);
ExpectEQ
<
T
>
(
o_data
,
oref_data
,
o_w
);
}
};
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
MatMulTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
...
...
@@ -292,6 +318,63 @@ struct TestFuncWithRefer<jit::MatMulTuples<T>, std::vector<T>, std::vector<T>,
}
};
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
LayerNormTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
int
,
float
,
int
>
{
void
operator
()(
const
typename
jit
::
LayerNormTuples
<
T
>::
func_type
tgt
,
std
::
vector
<
T
>&
x
,
std
::
vector
<
T
>&
outref
,
// NOLINT
std
::
vector
<
T
>&
mean
,
std
::
vector
<
T
>&
var
,
// NOLINT
const
std
::
vector
<
T
>&
scale
,
const
std
::
vector
<
T
>&
bias
,
int
left
,
const
float
epsilon
,
int
right
)
{
EXPECT_TRUE
(
tgt
!=
nullptr
);
EXPECT_EQ
(
x
.
size
(),
static_cast
<
size_t
>
(
left
*
right
));
EXPECT_EQ
(
outref
.
size
(),
static_cast
<
size_t
>
(
left
*
right
));
EXPECT_EQ
(
mean
.
size
(),
static_cast
<
size_t
>
(
left
));
EXPECT_EQ
(
var
.
size
(),
static_cast
<
size_t
>
(
left
));
EXPECT_EQ
(
scale
.
size
(),
static_cast
<
size_t
>
(
right
));
EXPECT_EQ
(
bias
.
size
(),
static_cast
<
size_t
>
(
right
));
std
::
vector
<
T
>
outtgt
(
outref
.
size
());
const
T
*
scale_data
=
scale
.
data
();
const
T
*
bias_data
=
bias
.
data
();
T
*
x_data
=
x
.
data
();
T
*
mean_data
=
mean
.
data
();
T
*
var_data
=
var
.
data
();
T
*
outref_data
=
outref
.
data
();
T
*
outtgt_data
=
outtgt
.
data
();
tgt
(
x_data
,
outtgt_data
,
mean_data
,
var_data
,
scale_data
,
bias_data
,
left
,
epsilon
,
right
);
ExpectEQ
<
T
>
(
outtgt_data
,
outref_data
,
left
*
right
);
}
};
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
CRFDecodingTuples
<
T
>
,
int
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
int
>
,
int
>
{
void
operator
()(
const
typename
jit
::
CRFDecodingTuples
<
T
>::
func_type
tgt
,
const
int
seq_len
,
const
std
::
vector
<
T
>&
x
,
const
std
::
vector
<
T
>&
w
,
std
::
vector
<
T
>&
alpharef
,
// NOLINT
std
::
vector
<
int
>&
trackref
,
int
tag_num
)
{
// NOLINT
constexpr
int
state_trans_base_idx
=
2
;
EXPECT_TRUE
(
tgt
!=
nullptr
);
EXPECT_EQ
(
x
.
size
(),
static_cast
<
size_t
>
(
seq_len
*
tag_num
));
EXPECT_EQ
(
w
.
size
(),
static_cast
<
size_t
>
((
tag_num
+
state_trans_base_idx
)
*
tag_num
));
EXPECT_EQ
(
alpharef
.
size
(),
static_cast
<
size_t
>
(
seq_len
*
tag_num
));
EXPECT_EQ
(
trackref
.
size
(),
static_cast
<
size_t
>
(
seq_len
*
tag_num
));
std
::
vector
<
T
>
alphatgt
(
alpharef
.
size
());
std
::
vector
<
int
>
tracktgt
(
trackref
.
size
());
memcpy
(
trackref
.
data
(),
tracktgt
.
data
(),
tag_num
*
sizeof
(
int
));
tgt
(
seq_len
,
(
const
T
*
)
x
.
data
(),
(
const
T
*
)
w
.
data
(),
alphatgt
.
data
(),
tracktgt
.
data
(),
tag_num
);
ExpectEQ
<
T
>
(
alpharef
.
data
(),
alphatgt
.
data
(),
seq_len
*
tag_num
);
ExpectEQ
<
int
>
(
trackref
.
data
(),
tracktgt
.
data
(),
seq_len
*
tag_num
);
}
};
template
<
jit
::
KernelType
KT
,
typename
KernelTuples
,
typename
PlaceType
,
typename
...
Args
>
void
TestAllImpls
(
const
typename
KernelTuples
::
attr_type
&
attr
,
Args
...
args
)
{
...
...
@@ -587,6 +670,40 @@ void TestSoftmaxKernel() {
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
TestEmbSeqPoolKernel
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
int64_t
tbl_h
=
1e4
;
std
::
vector
<
jit
::
SeqPoolType
>
pool_types
=
{
jit
::
SeqPoolType
::
kSum
};
// only support sum yet
for
(
int
tbl_w
:
TestSizes
())
{
std
::
vector
<
T
>
table
(
tbl_h
*
tbl_w
);
RandomVec
<
T
>
(
tbl_h
*
tbl_w
,
table
.
data
(),
-
2.
f
,
2.
f
);
const
T
*
table_data
=
table
.
data
();
for
(
auto
type
:
pool_types
)
{
for
(
int
idx_w
:
{
1
,
2
,
10
,
16
})
{
for
(
int
idx_h
:
{
1
,
2
,
9
,
13
,
16
})
{
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
EmbSeqPoolTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
std
::
vector
<
int64_t
>
idx
(
idx_h
*
idx_w
);
RandomVec
<
int64_t
>
(
idx_h
*
idx_w
,
idx
.
data
(),
0
,
tbl_h
-
1
);
int64_t
out_w
=
tbl_w
*
idx_w
;
std
::
vector
<
T
>
oref
(
out_w
);
const
int64_t
*
idx_data
=
idx
.
data
();
T
*
o_data
=
oref
.
data
();
jit
::
emb_seq_pool_attr_t
attr
(
tbl_h
,
tbl_w
,
idx_h
,
idx_w
,
out_w
,
type
);
ref
(
table_data
,
idx_data
,
o_data
,
&
attr
);
TestAllImpls
<
KT
,
jit
::
EmbSeqPoolTuples
<
T
>
,
PlaceType
,
std
::
vector
<
T
>
,
std
::
vector
<
int64_t
>
,
std
::
vector
<
T
>>
(
attr
,
table
,
idx
,
oref
,
attr
);
}
}
}
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
TestNCHW16CMulNCKernel
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
...
...
@@ -640,6 +757,71 @@ void TestNCHW16CMulNCKernel() {
}
}
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
TestLayerNormKernel
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
const
T
epsilon
=
9.99999975e-06
;
for
(
int
n
:
{
1
,
2
,
10
})
{
for
(
int
x_dim_0
:
{
1
,
9
,
17
,
50
})
{
int
left
=
n
*
x_dim_0
;
for
(
int
x_dim_1
:
TestSizes
())
{
int
right
=
x_dim_1
;
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
LayerNormTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
int
sz
=
left
*
right
;
std
::
vector
<
T
>
x
(
sz
),
mean
(
left
),
var
(
left
),
scale
(
right
),
bias
(
right
),
outref
(
sz
);
RandomVec
<
T
>
(
sz
,
x
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
left
,
mean
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
left
,
var
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
right
,
scale
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
right
,
bias
.
data
(),
-
2.
f
,
2.
f
);
const
T
*
scale_data
=
scale
.
data
();
const
T
*
bias_data
=
bias
.
data
();
T
*
x_data
=
x
.
data
();
T
*
mean_data
=
mean
.
data
();
T
*
var_data
=
var
.
data
();
T
*
outref_data
=
outref
.
data
();
ref
(
x_data
,
outref_data
,
mean_data
,
var_data
,
scale_data
,
bias_data
,
left
,
epsilon
,
right
);
TestAllImpls
<
KT
,
jit
::
LayerNormTuples
<
T
>
,
PlaceType
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
int
,
float
>
(
right
,
x
,
outref
,
mean
,
var
,
scale
,
bias
,
left
,
epsilon
,
right
);
}
}
}
}
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
TestCRFDecodingKernel
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
constexpr
int
state_trans_base_idx
=
2
;
for
(
int
seq_len
:
{
1
,
11
,
17
,
50
})
{
for
(
int
tag_num
:
TestSizes
())
{
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
CRFDecodingTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
int
x_sz
=
seq_len
*
tag_num
;
int
w_sz
=
(
tag_num
+
state_trans_base_idx
)
*
tag_num
;
std
::
vector
<
T
>
x
(
x_sz
),
w
(
w_sz
),
alpharef
(
x_sz
);
std
::
vector
<
int
>
trackref
(
x_sz
);
RandomVec
<
T
>
(
x_sz
,
x
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
w_sz
,
w
.
data
(),
-
2.
f
,
2.
f
);
ref
(
seq_len
,
(
const
T
*
)
x
.
data
(),
(
const
T
*
)
w
.
data
(),
alpharef
.
data
(),
trackref
.
data
(),
tag_num
);
TestAllImpls
<
KT
,
jit
::
CRFDecodingTuples
<
T
>
,
PlaceType
,
int
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
int
>
,
int
>
(
tag_num
,
seq_len
,
x
,
w
,
alpharef
,
trackref
,
tag_num
);
}
}
}
// XYZNTuple
TEST
(
JITKernel
,
kVMul
)
{
TestXYZNKernel
<
jit
::
kVMul
,
float
,
CPUPlace
>
();
...
...
@@ -756,12 +938,26 @@ TEST(JITKernel, kSoftmax) {
TestSoftmaxKernel
<
jit
::
kSoftmax
,
double
,
CPUPlace
>
();
}
TEST
(
JITKernel
,
kEmbSeqPool
)
{
TestEmbSeqPoolKernel
<
jit
::
kEmbSeqPool
,
float
,
CPUPlace
>
();
TestEmbSeqPoolKernel
<
jit
::
kEmbSeqPool
,
double
,
CPUPlace
>
();
}
TEST
(
JITKernel
,
kNCHW16CMulNC
)
{
TestNCHW16CMulNCKernel
<
jit
::
kNCHW16CMulNC
,
float
,
CPUPlace
>
();
TestNCHW16CMulNCKernel
<
jit
::
kNCHW16CMulNC
,
double
,
CPUPlace
>
();
}
// TODO(yihua/TJ): add crf decoding and layer norm unit tests
TEST
(
JITKernel
,
kLayerNorm
)
{
TestLayerNormKernel
<
jit
::
kLayerNorm
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestLayerNormKernel
<
jit
::
kLayerNorm
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
TEST
(
JITKernel
,
kCRFDecoding
)
{
TestCRFDecodingKernel
<
jit
::
kCRFDecoding
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestCRFDecodingKernel
<
jit
::
kCRFDecoding
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
TEST
(
JITKernel
,
pool
)
{
// TODO(TJ): add some test
...
...
paddle/fluid/operators/load_combine_op.cc
浏览文件 @
5998d3cc
...
...
@@ -64,7 +64,7 @@ class LoadCombineOp : public framework::OperatorBase {
auto
*
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
// Error checking
PADDLE_ENFORCE
(
static_cast
<
bool
>
(
buffer
),
"Cannot read more"
);
PADDLE_ENFORCE
(
static_cast
<
bool
>
(
*
buffer
),
"Cannot read more"
);
// Get data from fin to tensor
DeserializeFromStream
(
*
buffer
,
tensor
,
dev_ctx
);
...
...
@@ -90,6 +90,10 @@ class LoadCombineOp : public framework::OperatorBase {
tensor
->
ShareDataWith
(
fp16_tensor
);
}
}
buffer
->
peek
();
PADDLE_ENFORCE
(
buffer
->
eof
(),
"You are not allowed to load partial data via "
"load_combine_op, use load_op instead."
);
}
};
...
...
paddle/fluid/operators/lstm_op.h
浏览文件 @
5998d3cc
...
...
@@ -311,6 +311,10 @@ class LSTMGradKernel : public framework::OpKernel<T> {
lstm_grad
.
prev_state_grad
=
c0_g
?
ordered_c0_g
.
data
<
T
>
()
:
nullptr
;
}
// lstm_value.output_value not used in bp, set to nullptr
// lstm_grad.state_active_grad not used in bp, set to nullptr
lstm_value
.
output_value
=
nullptr
;
lstm_grad
.
state_active_grad
=
nullptr
;
int
cur_batch_size
=
bend
-
bstart
;
math
::
LstmUnitGradFunctor
<
DeviceContext
,
T
>::
compute
(
device_ctx
,
lstm_value
,
lstm_grad
,
frame_size
,
cur_batch_size
,
...
...
paddle/fluid/operators/lstmp_op.h
浏览文件 @
5998d3cc
...
...
@@ -405,6 +405,11 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
}
int
cur_batch_size
=
bend
-
bstart
;
// lstmp_value.output_value not used in bp, set to null
// lstmp_grad.state_active_grad not used in bp, set to null
lstmp_value
.
output_value
=
nullptr
;
lstmp_grad
.
state_active_grad
=
nullptr
;
math
::
LstmUnitGradFunctor
<
DeviceContext
,
T
>::
compute
(
device_ctx
,
lstmp_value
,
lstmp_grad
,
frame_size
,
cur_batch_size
,
gate_act
,
cell_act
,
cand_act
);
...
...
paddle/fluid/operators/ngraph/ngraph_bridge.cc
浏览文件 @
5998d3cc
...
...
@@ -36,6 +36,8 @@ std::map<std::string,
{
"conv2d_grad"
,
NG_OPS
::
BuildConv2dGradNode
},
{
"batch_norm"
,
NG_OPS
::
BuildBatchNormNode
},
{
"batch_norm_grad"
,
NG_OPS
::
BuildBatchNormGradNode
},
{
"cross_entropy"
,
NG_OPS
::
BuildCrossEntropyNode
},
{
"cross_entropy_grad"
,
NG_OPS
::
BuildCrossEntropyGradNode
},
{
"elementwise_add"
,
NG_OPS
::
BuildElementwiseAddNode
},
{
"elementwise_add_grad"
,
NG_OPS
::
BuildElementwiseAddGradNode
},
{
"fill_constant"
,
NG_OPS
::
BuildFillConstantNode
},
...
...
paddle/fluid/operators/ngraph/ngraph_ops.h
浏览文件 @
5998d3cc
...
...
@@ -26,6 +26,7 @@ limitations under the License. */
#include "ops/batch_norm_op.h"
#include "ops/binary_unary_op.h"
#include "ops/conv2d_op.h"
#include "ops/cross_entropy_op.h"
#include "ops/elementwise_add_op.h"
#include "ops/fill_constant_op.h"
#include "ops/mean_op.h"
...
...
paddle/fluid/operators/ngraph/ops/batch_norm_op.h
浏览文件 @
5998d3cc
...
...
@@ -44,6 +44,10 @@ void BuildBatchNormNode(
const
float
epsilon
=
op_attrs
.
Get
<
float
>
(
"epsilon"
);
const
float
momentum
=
op_attrs
.
Get
<
float
>
(
"momentum"
);
PADDLE_ENFORCE
(
data_layout
==
"NHWC"
||
data_layout
==
"NCHW"
||
data_layout
==
"NC"
,
"The BatchNorm operator only supports NHWC/NCHW/NC data format"
);
if
(
data_layout
==
"NHWC"
)
{
x
=
paddle
::
platform
::
Nhwc2Nchw
(
x
);
}
...
...
@@ -110,6 +114,9 @@ void BuildBatchNormGradNode(
"BN grap input size needs to be 2 or 4"
);
PADDLE_ENFORCE_EQ
(
x_shape
.
size
(),
dy_shape
.
size
(),
"BN grap input and delta size needs to be equal"
);
PADDLE_ENFORCE
(
data_layout
==
"NHWC"
||
data_layout
==
"NCHW"
||
data_layout
==
"NC"
,
"The BatchNorm operator only supports NHWC/NCHW/NC data format"
);
if
(
x_shape
.
size
()
==
2
)
{
x
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
...
...
paddle/fluid/operators/ngraph/ops/cross_entropy_op.h
0 → 100644
浏览文件 @
5998d3cc
/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <functional>
#include <string>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace
paddle
{
namespace
operators
{
namespace
ngraphs
{
void
BuildCrossEntropyNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
x
=
paddle
::
platform
::
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
label
=
paddle
::
platform
::
GetInputNode
(
op
,
"Label"
,
ngb_node_map
);
auto
label_shape
=
label
->
get_shape
();
auto
x_shape
=
x
->
get_shape
();
auto
label_rank
=
label_shape
.
size
();
auto
x_rank
=
x_shape
.
size
();
std
::
shared_ptr
<
ngraph
::
Node
>
x_2d
=
x
,
label_2d
=
label
;
auto
label_2d_shape
=
label_shape
,
x_2d_shape
=
x_shape
;
if
(
label_rank
>
2
)
{
label_2d_shape
=
paddle
::
platform
::
FlattenTo2d
(
label_shape
,
label_rank
-
1
);
label_2d
=
paddle
::
platform
::
NgReshaper
(
label
,
label_2d_shape
);
}
if
(
x_rank
>
2
)
{
x_2d_shape
=
paddle
::
platform
::
FlattenTo2d
(
x_shape
,
x_rank
-
1
);
x_2d
=
paddle
::
platform
::
NgReshaper
(
x
,
x_2d_shape
);
}
auto
batch_size
=
x_2d_shape
.
at
(
0
);
auto
op_attrs
=
paddle
::
framework
::
AttrReader
(
op
->
Attrs
());
const
bool
is_soft_label
=
op_attrs
.
Get
<
bool
>
(
"soft_label"
);
std
::
shared_ptr
<
ngraph
::
Node
>
node_1_hot
=
label_2d
;
if
(
!
is_soft_label
)
{
auto
label_1d
=
paddle
::
platform
::
NgReshaper
(
label_2d
,
ngraph
::
Shape
{
label_2d_shape
.
at
(
0
)});
node_1_hot
=
std
::
make_shared
<
ngraph
::
op
::
OneHot
>
(
label_1d
,
x_2d_shape
,
1
);
}
if
(
x
->
get_element_type
()
!=
node_1_hot
->
get_element_type
())
{
node_1_hot
=
std
::
make_shared
<
ngraph
::
op
::
Convert
>
(
node_1_hot
,
x
->
get_element_type
());
}
auto
node_log
=
std
::
make_shared
<
ngraph
::
op
::
Log
>
(
x_2d
);
auto
high_clip
=
ngraph
::
op
::
Constant
::
create
(
node_log
->
get_element_type
(),
node_log
->
get_shape
(),
{
1e20
});
auto
low_clip
=
ngraph
::
op
::
Constant
::
create
(
node_log
->
get_element_type
(),
node_log
->
get_shape
(),
{
-
1e20
});
auto
node_min
=
std
::
make_shared
<
ngraph
::
op
::
Minimum
>
(
node_log
,
high_clip
);
auto
node_max
=
std
::
make_shared
<
ngraph
::
op
::
Maximum
>
(
node_min
,
low_clip
);
auto
node_mul
=
node_1_hot
*
node_log
;
auto
node_sum
=
std
::
make_shared
<
ngraph
::
op
::
Sum
>
(
node_mul
,
ngraph
::
AxisSet
{
1
});
auto
node_neg
=
std
::
make_shared
<
ngraph
::
op
::
Negative
>
(
node_sum
);
auto
xe
=
paddle
::
platform
::
NgReshaper
(
node_neg
,
ngraph
::
Shape
{
batch_size
,
1
});
if
(
!
is_soft_label
)
{
auto
ignore_index
=
op_attrs
.
Get
<
int
>
(
"ignore_index"
);
auto
ignore_node
=
ngraph
::
op
::
Constant
::
create
(
label
->
get_element_type
(),
label_2d_shape
,
{
ignore_index
});
auto
not_equal_node
=
std
::
make_shared
<
ngraph
::
op
::
NotEqual
>
(
label_2d
,
ignore_node
);
auto
mask
=
std
::
make_shared
<
ngraph
::
op
::
Convert
>
(
not_equal_node
,
xe
->
get_element_type
());
xe
=
xe
*
mask
;
}
paddle
::
platform
::
SetOutputNode
(
op
,
"Y"
,
xe
,
ngb_node_map
);
}
void
BuildCrossEntropyGradNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
op_attrs
=
paddle
::
framework
::
AttrReader
(
op
->
Attrs
());
const
bool
is_soft_label
=
op_attrs
.
Get
<
bool
>
(
"soft_label"
);
auto
x
=
paddle
::
platform
::
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
label
=
paddle
::
platform
::
GetInputNode
(
op
,
"Label"
,
ngb_node_map
);
auto
dy
=
paddle
::
platform
::
GetInputNode
(
op
,
"Y@GRAD"
,
ngb_node_map
);
auto
x_shape
=
x
->
get_shape
();
auto
rank
=
x_shape
.
size
();
std
::
shared_ptr
<
ngraph
::
Node
>
mask
;
if
(
!
is_soft_label
)
{
auto
label_shape
=
label
->
get_shape
();
label_shape
.
pop_back
();
label
=
paddle
::
platform
::
NgReshaper
(
label
,
label_shape
);
auto
ignore_index
=
op_attrs
.
Get
<
int
>
(
"ignore_index"
);
auto
ignore_node
=
ngraph
::
op
::
Constant
::
create
(
label
->
get_element_type
(),
label_shape
,
{
ignore_index
});
auto
not_equal_node
=
std
::
make_shared
<
ngraph
::
op
::
NotEqual
>
(
label
,
ignore_node
);
mask
=
std
::
make_shared
<
ngraph
::
op
::
Convert
>
(
not_equal_node
,
x
->
get_element_type
());
mask
=
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
mask
,
x_shape
,
ngraph
::
AxisSet
{
rank
-
1
});
label
=
std
::
make_shared
<
ngraph
::
op
::
OneHot
>
(
label
,
x_shape
,
rank
-
1
);
}
auto
dy_shape
=
dy
->
get_shape
();
dy_shape
.
pop_back
();
auto
dy_reshape
=
paddle
::
platform
::
NgReshaper
(
dy
,
dy_shape
);
auto
dy_bcast
=
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
dy_reshape
,
x_shape
,
ngraph
::
AxisSet
{
rank
-
1
});
if
(
x
->
get_element_type
()
!=
label
->
get_element_type
())
{
label
=
std
::
make_shared
<
ngraph
::
op
::
Convert
>
(
label
,
x
->
get_element_type
());
}
auto
xe_grad
=
-
label
*
dy_bcast
/
x
;
if
(
!
is_soft_label
)
{
xe_grad
=
xe_grad
*
mask
;
}
paddle
::
platform
::
SetOutputNode
(
op
,
"X@GRAD"
,
xe_grad
,
ngb_node_map
);
}
}
// namespace ngraphs
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/ngraph/ops/fill_constant_op.h
浏览文件 @
5998d3cc
...
...
@@ -46,8 +46,6 @@ void BuildFillConstantNode(
ng_dtype
=
ngraph
::
element
::
i64
;
}
else
if
(
data_type
==
paddle
::
framework
::
proto
::
VarType
::
INT32
)
{
ng_dtype
=
ngraph
::
element
::
i32
;
}
else
if
(
data_type
==
paddle
::
framework
::
proto
::
VarType
::
BOOL
)
{
ng_dtype
=
ngraph
::
element
::
boolean
;
}
else
{
PADDLE_THROW
(
"unsupported data type: %s"
,
data_type
);
}
...
...
paddle/fluid/operators/row_conv_op.cc
浏览文件 @
5998d3cc
...
...
@@ -109,23 +109,23 @@ from future subsequences in a computationally efficient manner to improve
unidirectional recurrent neural networks. The row convolution operator is
different from the 1D sequence convolution, and is computed as follows:
Given an input sequence $
in$ of length $t$ and input dimension $d
$,
and a filter ($W$) of size $context \times
d$,
Given an input sequence $
X$ of length $t$ and input dimension $D
$,
and a filter ($W$) of size $context \times
D$,
the output sequence is convolved as:
$$
out_{i
, :} = \\sum_{j=i}^{i + context} in_{j,:} \\cdot W_{i-j, :
}
out_{i
} = \\sum_{j=i}^{i + context - 1} X_{j} \\cdot W_{j-i
}
$$
In the above equation:
* $Out_{i}$: The i-th row of output variable with shape [1, D].
* $
\\tau
$: Future context size.
* $
context
$: Future context size.
* $X_{j}$: The j-th row of input variable with shape [1, D].
* $W_{
i-j}$: The (i-j
)-th row of parameters with shape [1, D].
* $W_{
j-i}$: The (j-i
)-th row of parameters with shape [1, D].
More details about row_conv please refer to
the design document
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
5998d3cc
...
...
@@ -234,8 +234,10 @@ inline void throw_on_error(ncclResult_t stat, const std::string& msg) {
#endif // PADDLE_WITH_CUDA
#define PADDLE_THROW(...) \
do { \
throw ::paddle::platform::EnforceNotMet( \
::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__)
::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__); \
} while (0)
#define PADDLE_ENFORCE(COND, ...) \
do { \
...
...
@@ -274,19 +276,21 @@ inline void throw_on_error(ncclResult_t stat, const std::string& msg) {
do { \
if (UNLIKELY(nullptr == (__VAL))) { \
PADDLE_THROW(#__VAL " should not be null\n%s", \
paddle::string::Sprintf(""
__VA_ARGS__)); \
::paddle::string::Sprintf(
__VA_ARGS__)); \
} \
} while (0)
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
do { \
if (UNLIKELY(!((__VAL0)__CMP(__VAL1)))) { \
auto __cond1__ = (__VAL0); \
auto __cond2__ = (__VAL1); \
if (UNLIKELY(!((__cond1__)__CMP(__cond2__)))) { \
PADDLE_THROW("Enforce failed. Expected %s " #__CMP \
" %s, but received %s:%s " #__INV_CMP " %s:%s.\n%s", \
#__VAL0, #__VAL1, #__VAL0, \
paddle::string::to_string(__VAL0), #__VAL1,
\
paddle::string::to_string(__VAL1),
\
paddle::string::Sprintf("" __VA_ARGS__));
\
::paddle::string::to_string(__cond1__), #__VAL1,
\
::paddle::string::to_string(__cond2__),
\
::paddle::string::Sprintf(__VA_ARGS__));
\
} \
} while (0)
...
...
paddle/fluid/pybind/ir.cc
浏览文件 @
5998d3cc
...
...
@@ -13,10 +13,12 @@
// limitations under the License.
#include "paddle/fluid/pybind/ir.h"
#include <algorithm>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/op_desc.h"
...
...
@@ -27,6 +29,10 @@ namespace py = pybind11;
using
paddle
::
framework
::
ir
::
Graph
;
using
paddle
::
framework
::
ir
::
Node
;
using
paddle
::
framework
::
ir
::
GraphSafeRemoveNodes
;
using
paddle
::
framework
::
ir
::
HasCircle
;
using
paddle
::
framework
::
ir
::
GraphNum
;
using
paddle
::
framework
::
ir
::
TopologySortOperations
;
using
paddle
::
framework
::
ir
::
BuildOperationAdjList
;
using
paddle
::
framework
::
OpDesc
;
using
paddle
::
framework
::
ProgramDesc
;
using
paddle
::
framework
::
VarDesc
;
...
...
@@ -36,6 +42,12 @@ namespace paddle {
namespace
pybind
{
void
BindGraph
(
py
::
module
*
m
)
{
m
->
def
(
"graph_safe_remove_nodes"
,
GraphSafeRemoveNodes
);
m
->
def
(
"has_circle"
,
HasCircle
);
m
->
def
(
"graph_num"
,
GraphNum
);
m
->
def
(
"topology_sort"
,
TopologySortOperations
,
return_value_policy
::
reference
);
m
->
def
(
"build_adjacency_list"
,
BuildOperationAdjList
,
return_value_policy
::
reference
);
py
::
class_
<
Graph
,
std
::
shared_ptr
<
Graph
>>
(
*
m
,
"Graph"
,
"The graph is a Directed Acyclic Single Static Assignment Graph, see "
...
...
@@ -46,7 +58,6 @@ void BindGraph(py::module *m) {
.
def
(
"get_float"
,
&
Graph
::
Get
<
float
>
)
.
def
(
"get_double"
,
&
Graph
::
Get
<
double
>
)
.
def
(
"get_string"
,
&
Graph
::
Get
<
std
::
string
>
)
.
def
(
"get_program"
,
&
Graph
::
Get
<
ProgramDesc
>
)
.
def
(
"get_marked_nodes"
,
&
Graph
::
Get
<
std
::
unordered_set
<
const
Node
*>>
)
.
def
(
"set"
,
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
int
attr
)
{
return
self
.
Set
(
attr_name
,
new
int
(
attr
));
})
...
...
@@ -63,11 +74,6 @@ void BindGraph(py::module *m) {
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
double
attr
)
{
return
self
.
Set
(
attr_name
,
new
double
(
attr
));
})
.
def
(
"set"
,
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
const
ProgramDesc
&
attr
)
{
return
self
.
Set
(
attr_name
,
new
ProgramDesc
(
attr
));
})
.
def
(
"set"
,
[](
Graph
&
self
,
const
std
::
string
&
attr_name
,
const
std
::
unordered_set
<
const
Node
*>
&
attr
)
{
...
...
@@ -108,42 +114,42 @@ void BindNode(py::module *m) {
.
def
(
"is_op"
,
&
Node
::
IsOp
)
.
def
(
"is_var"
,
&
Node
::
IsVar
)
.
def
(
"is_ctrl_var"
,
&
Node
::
IsCtrlVar
)
.
def
(
"clear_inputs"
,
[](
Node
&
self
)
{
self
.
inputs
.
clear
();
})
.
def
(
"inputs_remove"
,
[](
Node
&
self
,
int
node_id
)
{
for
(
auto
it
=
self
.
inputs
.
begin
();
it
!=
self
.
inputs
.
end
();
it
++
)
{
if
((
*
it
)
->
id
()
==
node_id
)
{
self
.
inputs
.
erase
(
it
);
}
auto
pos
=
std
::
find_if
(
self
.
inputs
.
begin
(),
self
.
inputs
.
end
(),
[
&
node_id
](
const
Node
*
n
)
{
return
n
->
id
()
==
node_id
;
});
if
(
pos
!=
self
.
inputs
.
end
())
{
self
.
inputs
.
erase
(
pos
);
}
})
.
def
(
"inputs_remove"
,
[](
Node
&
self
,
Node
&
node
)
{
for
(
auto
it
=
self
.
inputs
.
begin
();
it
!=
self
.
inputs
.
end
();
it
++
)
{
if
(
*
it
==
&
node
)
{
self
.
inputs
.
erase
(
it
);
}
auto
pos
=
std
::
find
(
self
.
inputs
.
begin
(),
self
.
inputs
.
end
(),
&
node
);
if
(
pos
!=
self
.
inputs
.
end
())
{
self
.
inputs
.
erase
(
pos
);
}
})
.
def
(
"inputs_append"
,
[](
Node
&
self
,
Node
&
node
)
{
self
.
inputs
.
push_back
(
&
node
);
})
.
def
(
"clear_outputs"
,
[](
Node
&
self
)
{
self
.
outputs
.
clear
();
})
.
def
(
"outputs_remove"
,
[](
Node
&
self
,
int
node_id
)
{
for
(
auto
it
=
self
.
outputs
.
begin
();
it
!=
self
.
outputs
.
end
();
it
++
)
{
if
((
*
it
)
->
id
()
==
node_id
)
{
self
.
outputs
.
erase
(
it
);
}
auto
pos
=
std
::
find_if
(
self
.
outputs
.
begin
(),
self
.
outputs
.
end
(),
[
&
node_id
](
const
Node
*
n
)
{
return
n
->
id
()
==
node_id
;
});
if
(
pos
!=
self
.
outputs
.
end
())
{
self
.
outputs
.
erase
(
pos
);
}
})
.
def
(
"outputs_remove"
,
[](
Node
&
self
,
Node
&
node
)
{
for
(
auto
it
=
self
.
outputs
.
begin
();
it
!=
self
.
outputs
.
end
();
it
++
)
{
if
(
*
it
==
&
node
)
{
self
.
outputs
.
erase
(
it
);
}
auto
pos
=
std
::
find
(
self
.
outputs
.
begin
(),
self
.
outputs
.
end
(),
&
node
);
if
(
pos
!=
self
.
outputs
.
end
())
{
self
.
outputs
.
erase
(
pos
);
}
})
.
def
(
"outputs_append"
,
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
5998d3cc
...
...
@@ -373,7 +373,13 @@ PYBIND11_MODULE(core, m) {
PADDLE_ENFORCE
(
CheckLoD
(
new_lod
,
vectorize
(
self
.
dims
()).
front
()),
"the provided lod info is invalid"
);
self
.
set_lod
(
new_lod
);
})
},
py
::
arg
(
"lod"
),
R"DOC(
Set LoD of the LoDTensor.
Args:
lod (List[List[int]]): the lod to be set.
)DOC"
)
.
def
(
"set_recursive_sequence_lengths"
,
[](
LoDTensor
&
self
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
recursive_sequence_lengths
)
{
...
...
@@ -389,7 +395,17 @@ PYBIND11_MODULE(core, m) {
CheckLoD
(
new_offset_lod
,
vectorize
(
self
.
dims
()).
front
()),
"the provided recursive_sequence_lengths info is invalid"
);
self
.
set_lod
(
new_offset_lod
);
})
},
py
::
arg
(
"recursive_sequence_lengths"
),
R"DOC(
Set LoD of the LoDTensor according to recursive sequence length.
For example, if recursive_sequence_lengths=[[2, 3]], meaning that
there are two sequences with length 2 and 3 respectively, the
corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]].
Args:
recursive_sequence_lengths (List[List[int]]): sequence lengths.
)DOC"
)
.
def
(
"lod"
,
[](
LoDTensor
&
self
)
->
std
::
vector
<
std
::
vector
<
size_t
>>
{
// output the offset-based lod info
...
...
@@ -398,7 +414,13 @@ PYBIND11_MODULE(core, m) {
new_lod
.
reserve
(
lod
.
size
());
std
::
copy
(
lod
.
begin
(),
lod
.
end
(),
std
::
back_inserter
(
new_lod
));
return
new_lod
;
})
},
R"DOC(
Return the LoD of the LoDTensor.
Returns:
out (List[List[int]]): the lod of the LoDTensor.
)DOC"
)
// Set above comments of set_lod.
.
def
(
"recursive_sequence_lengths"
,
[](
LoDTensor
&
self
)
->
std
::
vector
<
std
::
vector
<
size_t
>>
{
...
...
@@ -408,12 +430,25 @@ PYBIND11_MODULE(core, m) {
new_lod
.
reserve
(
lod
.
size
());
std
::
copy
(
lod
.
begin
(),
lod
.
end
(),
std
::
back_inserter
(
new_lod
));
return
new_lod
;
})
.
def
(
"has_valid_recursive_sequence_lengths"
,
[](
LoDTensor
&
self
)
->
bool
{
},
R"DOC(
Return the sequence length of the LoDTensor corresponding to LoD.
Returns:
out (List[List[int]): the sequence lengths.
)DOC"
)
.
def
(
"has_valid_recursive_sequence_lengths"
,
[](
LoDTensor
&
self
)
->
bool
{
// Check that the lod info is valid and match the outermost
// dimension of the LoDTensor data
return
CheckLoD
(
self
.
lod
(),
vectorize
(
self
.
dims
()).
front
());
});
},
R"DOC(
Check whether the lod of the LoDTensor is valid.
Returns:
out (bool): whether the lod is valid.
)DOC"
);
py
::
class_
<
SelectedRows
>
(
m
,
"SelectedRows"
)
.
def
(
"__init__"
,
...
...
@@ -549,11 +584,45 @@ All parameter, weight, gradient are variables in Paddle.
[](
Scope
&
self
,
const
std
::
string
&
name
)
->
Variable
*
{
return
self
.
Var
(
name
);
},
py
::
arg
(
"name"
),
R"DOC(
Find or create variable named :code:`name` in the current scope.
If the variable named :code:`name` does not exist in the
current scope, the variable would be created. Otherwise,
return the existing variable.
Args:
name (str): the variable name.
Returns:
out (core.Variable): the found or created variable.
)DOC"
,
py
::
return_value_policy
::
reference
)
.
def
(
"find_var"
,
&
Scope
::
FindVar
,
py
::
arg
(
"name"
),
R"DOC(
Find variable named :code:`name` in the current scope or
its parent scope. Return None if not found.
Args:
name (str): the variable name.
Returns:
out (core.Variable|None): the found variable or None.
)DOC"
,
py
::
return_value_policy
::
reference
)
.
def
(
"find_var"
,
&
Scope
::
FindVar
,
py
::
return_value_policy
::
reference
)
.
def
(
"new_scope"
,
[](
Scope
&
self
)
->
Scope
*
{
return
&
self
.
NewScope
();
},
R"DOC(
Create a new sub-scope of the current scope.
Returns:
out (core._Scope): the created sub-scope.
)DOC"
,
py
::
return_value_policy
::
reference
)
.
def
(
"drop_kids"
,
&
Scope
::
DropKids
);
.
def
(
"drop_kids"
,
&
Scope
::
DropKids
,
R"DOC(
Delete all sub-scopes of the current scope.
)DOC"
);
m
.
def
(
"Scope"
,
[]()
->
Scope
*
{
...
...
@@ -561,6 +630,12 @@ All parameter, weight, gradient are variables in Paddle.
ScopePool
::
Instance
().
Insert
(
std
::
unique_ptr
<
Scope
>
(
s
));
return
s
;
},
R"DOC(
Create a new scope.
Returns:
out (core._Scope): the created scope.
)DOC"
,
py
::
return_value_policy
::
reference
);
//! @note: Be careful! PyBind will return std::string as an unicode, not
...
...
@@ -789,11 +864,13 @@ All parameter, weight, gradient are variables in Paddle.
self
[
i
].
ShareDataWith
(
t
);
self
[
i
].
set_lod
(
t
.
lod
());
})
.
def
(
"append"
,
[](
LoDTensorArray
&
self
,
const
LoDTensor
&
t
)
{
.
def
(
"append"
,
[](
LoDTensorArray
&
self
,
const
LoDTensor
&
t
)
{
self
.
emplace_back
();
self
.
back
().
ShareDataWith
(
t
);
self
.
back
().
set_lod
(
t
.
lod
());
});
},
py
::
arg
(
"tensor"
),
"Append a LoDensor to LoDTensorArray."
);
m
.
def
(
"IsInplace"
,
[](
std
::
string
op
)
->
bool
{
return
operators
::
IsInplace
(
op
);
});
...
...
@@ -829,8 +906,7 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"disable_profiler"
,
platform
::
DisableProfiler
);
m
.
def
(
"is_profiler_enabled"
,
platform
::
IsProfileEnabled
);
m
.
def
(
"reset_profiler"
,
platform
::
ResetProfiler
);
m
.
def
(
"get_pass"
,
[](
const
py
::
bytes
&
binary_str
)
{
std
::
string
pass_type
(
binary_str
);
m
.
def
(
"get_pass"
,
[](
const
std
::
string
&
pass_type
)
{
auto
pass
=
framework
::
ir
::
PassRegistry
::
Instance
().
Get
(
pass_type
);
return
std
::
shared_ptr
<
framework
::
ir
::
Pass
>
(
std
::
move
(
pass
));
});
...
...
@@ -838,10 +914,9 @@ All parameter, weight, gradient are variables in Paddle.
py
::
class_
<
ir
::
Pass
,
std
::
shared_ptr
<
ir
::
Pass
>>
pass
(
m
,
"Pass"
);
pass
.
def
(
py
::
init
())
.
def
(
"has"
,
&
ir
::
Pass
::
Has
)
.
def
(
"set"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
attr_name
,
const
ProgramDesc
&
attr
)
{
return
self
.
Set
(
attr_name
,
new
ProgramDesc
(
attr
));
.
def
(
"set_not_owned"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
attr_name
,
ProgramDesc
&
attr
)
{
self
.
SetNotOwned
<
ProgramDesc
>
(
attr_name
,
&
attr
);
})
.
def
(
"set"
,
...
...
@@ -850,7 +925,6 @@ All parameter, weight, gradient are variables in Paddle.
})
.
def
(
"set"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
int
val
)
{
self
.
Set
<
const
int
>
(
name
,
new
int
(
val
));
})
.
def
(
"get_program"
,
&
ir
::
Pass
::
Get
<
ProgramDesc
>
)
.
def
(
"type"
,
&
ir
::
Pass
::
Type
)
.
def
(
"apply"
,
[](
ir
::
Pass
&
self
,
std
::
shared_ptr
<
ir
::
Graph
>
graph
)
{
std
::
unique_ptr
<
ir
::
Graph
>
origin_graph
(
graph
.
get
());
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
5998d3cc
...
...
@@ -88,6 +88,7 @@ function cmake_gen() {
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.5/include/python3.5m/
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/lib/libpython3.5m.dylib"
WITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
ON
}
pip3.5 uninstall
-y
protobuf
pip3.5
install
--user
-r
${
PADDLE_ROOT
}
/python/requirements.txt
else
exit
1
...
...
@@ -101,6 +102,7 @@ function cmake_gen() {
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.6/include/python3.6m/
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.6/lib/libpython3.6m.dylib"
WITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
ON
}
pip3.6 uninstall
-y
protobuf
pip3.6
install
--user
-r
${
PADDLE_ROOT
}
/python/requirements.txt
else
exit
1
...
...
@@ -114,6 +116,7 @@ function cmake_gen() {
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.7/include/python3.7m/
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.7/lib/libpython3.7m.dylib"
WITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
ON
}
pip3.7 uninstall
-y
protobuf
pip3.7
install
--user
-r
${
PADDLE_ROOT
}
/python/requirements.txt
else
exit
1
...
...
@@ -128,31 +131,44 @@ function cmake_gen() {
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so"
pip uninstall
-y
protobuf
pip
install
-r
${
PADDLE_ROOT
}
/python/requirements.txt
elif
[
"
$1
"
==
"cp27-cp27mu"
]
;
then
export
LD_LIBRARY_PATH
=
/opt/_internal/cpython-2.7.11-ucs4/lib:
${
LD_LIBRARY_PATH
#/opt/_internal/cpython-2.7.11-ucs2/lib
:
}
export
PATH
=
/opt/python/cp27-cp27mu/bin/:
${
PATH
}
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so"
pip uninstall
-y
protobuf
pip
install
-r
${
PADDLE_ROOT
}
/python/requirements.txt
elif
[
"
$1
"
==
"cp35-cp35m"
]
;
then
export
LD_LIBRARY_PATH
=
/opt/_internal/cpython-3.5.1/lib/:
${
LD_LIBRARY_PATH
}
export
PATH
=
/opt/_internal/cpython-3.5.1/bin/:
${
PATH
}
export
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.5.1/bin/python3
-DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.5.1/include/python3.5m
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.5.1/lib/libpython3.so"
pip3.5 uninstall
-y
protobuf
pip3.5
install
-r
${
PADDLE_ROOT
}
/python/requirements.txt
elif
[
"
$1
"
==
"cp36-cp36m"
]
;
then
export
LD_LIBRARY_PATH
=
/opt/_internal/cpython-3.6.0/lib/:
${
LD_LIBRARY_PATH
}
export
PATH
=
/opt/_internal/cpython-3.6.0/bin/:
${
PATH
}
export
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.6.0/bin/python3
-DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.6.0/include/python3.6m
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.6.0/lib/libpython3.so"
pip3.6 uninstall
-y
protobuf
pip3.6
install
-r
${
PADDLE_ROOT
}
/python/requirements.txt
elif
[
"
$1
"
==
"cp37-cp37m"
]
;
then
export
LD_LIBRARY_PATH
=
/opt/_internal/cpython-3.7.0/lib/:
${
LD_LIBRARY_PATH
}
export
PATH
=
/opt/_internal/cpython-3.7.0/bin/:
${
PATH
}
export
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.7.0/bin/python3.7
-DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.7.0/include/python3.7m
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.7.0/lib/libpython3.so"
pip3.7 uninstall
-y
protobuf
pip3.7
install
-r
${
PADDLE_ROOT
}
/python/requirements.txt
fi
else
pip uninstall
-y
protobuf
pip
install
-r
${
PADDLE_ROOT
}
/python/requirements.txt
fi
fi
...
...
python/CMakeLists.txt
浏览文件 @
5998d3cc
...
...
@@ -64,6 +64,7 @@ if (WITH_TESTING)
add_subdirectory
(
paddle/dataset/tests
)
add_subdirectory
(
paddle/fluid/tests
)
add_subdirectory
(
paddle/fluid/contrib/tests
)
add_subdirectory
(
paddle/fluid/contrib/slim/tests
)
endif
()
install
(
DIRECTORY
${
PADDLE_PYTHON_PACKAGE_DIR
}
DESTINATION opt/paddle/share/wheels
...
...
python/paddle/fluid/compiler.py
浏览文件 @
5998d3cc
...
...
@@ -177,7 +177,10 @@ class CompiledProgram(object):
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
self
.
_build_strategy
.
enable_inplace
=
False
if
self
.
_program
.
_is_mem_optimized
else
True
if
self
.
_build_strategy
.
memory_optimize
is
None
:
self
.
_build_strategy
.
memory_optimize
=
False
if
main
.
_is_mem_optimized
else
True
if
self
.
_build_strategy
.
enable_inplace
is
None
:
self
.
_build_strategy
.
enable_inplace
=
False
if
main
.
_is_mem_optimized
else
True
if
self
.
_build_strategy
.
num_trainers
>
1
and
trainers_endpoints
:
assert
self
.
_build_strategy
.
num_trainers
==
len
(
...
...
python/paddle/fluid/contrib/int8_inference/README.md
浏览文件 @
5998d3cc
...
...
@@ -63,10 +63,10 @@ Notes:
## 4. How to reproduce the results
*
Small dataset
```
bash
python python/paddle/fluid/contrib/tests/test_calibration.py
FLAGS_use_mkldnn
=
true
python python/paddle/fluid/contrib/tests/test_calibration.py
```
*
Full dataset
```
bash
DATASET
=
full python python/paddle/fluid/contrib/tests/test_calibration.py
FLAGS_use_mkldnn
=
true
DATASET
=
full python python/paddle/fluid/contrib/tests/test_calibration.py
```
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
浏览文件 @
5998d3cc
...
...
@@ -13,14 +13,19 @@
# limitations under the License.
import
collections
import
numpy
as
np
import
six
from
.....
import
compat
as
cpt
from
....
import
core
from
....framework
import
IrGraph
from
....framework
import
Program
from
....framework
import
Variable
from
....initializer
import
Constant
from
....
import
unique_name
__all__
=
[
'QuantizationTransformPass'
]
__all__
=
[
'QuantizationTransformPass'
,
'QuantizationFreezePass'
,
'ConvertToInt8Pass'
,
'TransformForMobilePass'
]
class
QuantizationTransformPass
(
object
):
...
...
@@ -35,7 +40,13 @@ class QuantizationTransformPass(object):
"""
Convert and rewrite the IrGraph according to weight and
activation quantization type.
Args:
scope(fluid.Scope): When activation use 'range_abs_max' as the quantize
type, this pass will create some new parameters. The scope is used to
initialize these new parameters.
program_exe(fluid.Executor): program_exe is used to initialize new
parameters described above.
weight_bits (int): quantization bit number for weights,
the bias is not quantized.
activation_bits (int): quantization bit number for activation.
...
...
@@ -49,6 +60,7 @@ class QuantizationTransformPass(object):
support 'abs_max'. The 'range_abs_max' usually is not used for
weight, since weights are fixed once the model is well trained.
window_size (int): the window size for 'range_abs_max' quantization.
Examples:
.. code-block:: python
# The original graph will be rewrite.
...
...
@@ -88,31 +100,35 @@ class QuantizationTransformPass(object):
self
.
_quantizable_grad_ops
=
[
'%s_grad'
%
(
op
)
for
op
in
self
.
_quantizable_ops
]
self
.
_fake_quant_op_types
=
[
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
]
self
.
_fake_dequant_op_types
=
[
'fake_dequantize_max_abs'
]
self
.
_is_test
=
None
self
.
_global_step
=
None
def
apply
(
self
,
graph
):
"""
Quantize the graph for training process. According to weight and
activation quantization type, the graph will be added some fake
quantize operators and fake dequantize operators.
Args:
graph(IrGraph): the applied graph.
"""
assert
isinstance
(
graph
,
IrGraph
),
'graph must be the instance of IrGraph.'
self
.
_need_initialized
.
clear
()
self
.
_is_test
=
graph
.
is_test
()
# marked the variable which has been dequantized.
dequantized_vars
=
collections
.
OrderedDict
()
p
arams
=
[
p
.
name
()
for
p
in
graph
.
all_paramete
rs
()]
p
ersistable_vars
=
[
p
.
name
()
for
p
in
graph
.
all_persistable_va
rs
()]
def
_transform_forward
(
graph
,
op
):
for
var_node
in
op
.
inputs
:
if
var_node
.
name
()
in
dequantized_vars
:
dequant_var_node
=
dequantized_vars
[
var_node
.
name
()]
else
:
quant_bits
=
self
.
_weight_bits
if
var_node
.
name
()
in
p
aram
s
\
quant_bits
=
self
.
_weight_bits
if
var_node
.
name
()
in
p
ersistable_var
s
\
else
self
.
_activation_bits
quant_type
=
self
.
_weight_quantize_type
if
var_node
.
name
()
\
in
p
aram
s
else
self
.
_activation_quantize_type
in
p
ersistable_var
s
else
self
.
_activation_quantize_type
quant_var_node
,
scale_var_node
=
self
.
_insert_quant_op
(
graph
,
var_node
,
quant_bits
,
quant_type
)
dequant_var_node
=
self
.
_insert_dequant_op
(
...
...
@@ -150,9 +166,14 @@ class QuantizationTransformPass(object):
assert
self
.
_program_exe
is
not
None
,
\
'The program_exe cannot be set None when activation_quantize_type equals to range_abs_max.'
init_program
=
Program
()
for
var_desc
,
initializer
in
self
.
_need_initialized
.
iteritems
():
var
=
Variable
(
init_program
.
global_block
())
var
.
_set_desc
(
var_desc
)
for
var_desc
,
initializer
in
six
.
iteritems
(
self
.
_need_initialized
):
var
=
init_program
.
global_block
().
create_var
(
name
=
var_desc
.
name
(),
shape
=
var_desc
.
shape
(),
dtype
=
var_desc
.
dtype
(),
type
=
var_desc
.
type
(),
lod_level
=
var_desc
.
lod_level
(),
persistable
=
var_desc
.
persistable
())
initializer
(
var
,
init_program
.
global_block
())
self
.
_program_exe
.
run
(
program
=
init_program
,
scope
=
self
.
_scope
)
...
...
@@ -161,7 +182,7 @@ class QuantizationTransformPass(object):
def
_create_global_step
(
self
,
graph
):
if
self
.
_weight_quantize_type
==
'range_abs_max'
or
\
self
.
_activation_quantize_type
==
'range_abs_max'
:
counter_name
=
'@STEP_COUNTER@'
counter_name
=
cpt
.
to_text
(
'@STEP_COUNTER@'
)
for
node
in
graph
.
all_vars
():
if
node
.
name
()
==
counter_name
:
self
.
_global_step
=
node
...
...
@@ -175,9 +196,14 @@ class QuantizationTransformPass(object):
Constant
(
value
=
0
,
force_cpu
=
True
)
global_step_out
=
graph
.
create_var_node_from_desc
(
global_step_in
.
var
())
# The attribute of `op_role` is needed by ParallelExecutor.
increment_op
=
graph
.
create_op_node
(
op_type
=
'increment'
,
attrs
=
{
'step'
:
1.0
},
attrs
=
{
'step'
:
1.0
,
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
},
inputs
=
{
'X'
:
global_step_in
},
outputs
=
{
'Out'
:
global_step_out
})
graph
.
link_to
(
global_step_in
,
increment_op
)
...
...
@@ -212,7 +238,10 @@ class QuantizationTransformPass(object):
var_dtype
=
var_node
.
var
().
dtype
())
quant_op_node
=
graph
.
create_op_node
(
op_type
=
'fake_quantize_abs_max'
,
attrs
=
{
'bit_length'
:
quant_bits
},
attrs
=
{
'bit_length'
:
quant_bits
,
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
},
inputs
=
{
'X'
:
var_node
},
outputs
=
{
'Out'
:
quant_var_node
,
'OutScale'
:
scale_var_node
})
...
...
@@ -257,7 +286,8 @@ class QuantizationTransformPass(object):
attrs
=
{
'window_size'
:
self
.
_window_size
,
'bit_length'
:
quant_bits
,
'is_test'
:
self
.
_is_test
'is_test'
:
self
.
_is_test
,
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
}
quant_op_node
=
graph
.
create_op_node
(
op_type
=
'fake_quantize_range_abs_max'
,
...
...
@@ -290,7 +320,10 @@ class QuantizationTransformPass(object):
max_range
=
(
1
<<
(
quant_bits
-
1
))
-
1
dequant_op_node
=
graph
.
create_op_node
(
op_type
=
'fake_dequantize_max_abs'
,
attrs
=
{
'max_range'
:
float
(
max_range
)},
attrs
=
{
'max_range'
:
float
(
max_range
),
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
},
inputs
=
{
'X'
:
var_node
,
'Scale'
:
scale_var_node
},
outputs
=
{
'Out'
:
dequant_var_node
})
...
...
@@ -316,3 +349,330 @@ class QuantizationTransformPass(object):
Return the scale name of quantized variable for the input `var_name`.
"""
return
"%s.scale"
%
(
var_name
)
class
QuantizationFreezePass
(
object
):
"""
The freeze pass is used to adjust the quantize operator order, for example:
1) `activation -> quant -> dequant -> conv2d` will be freezed into
`activation -> quant -> conv2d -> dequant`
2) `weight -> quant -> dequant -> conv2d` will be freezed into `weight -> conv2d`,
and weight will be sacled offline.
Args:
scope(fluid.Scope): scope is used to get the weight tensor values.
place(fluid.CPUPlace|fluid.CUDAPlace): place is used to restore the weight tensors.
weight_bits (int): quantization bit number for weights.
activation_bits (int): quantization bit number for activation.
weight_quantize_type (str): quantization type for weights, support 'abs_max'.
The 'range_abs_max' usually is not used for weight, since weights are fixed once the
model is well trained.
"""
def
__init__
(
self
,
scope
,
place
,
weight_bits
=
8
,
activation_bits
=
8
,
weight_quantize_type
=
'abs_max'
):
assert
scope
is
not
None
,
\
'The scope cannot be set None.'
assert
place
is
not
None
,
\
'The place cannot be set None.'
self
.
_scope
=
scope
self
.
_place
=
place
self
.
_weight_bits
=
weight_bits
self
.
_activation_bits
=
activation_bits
self
.
_weight_quantize_type
=
weight_quantize_type
self
.
_quantizable_ops
=
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
]
self
.
_fake_quant_op_names
=
[
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
]
self
.
_fake_dequant_op_names
=
[
'fake_dequantize_max_abs'
]
self
.
_op_input_rename_map
=
collections
.
OrderedDict
()
self
.
_op_output_rename_map
=
collections
.
OrderedDict
()
self
.
_var_scale_map
=
collections
.
OrderedDict
()
def
apply
(
self
,
graph
):
"""
Adjust quantize/dequantize operators order for the inference process.
Args:
graph(IrGraph): the applied graph.
"""
persistable_vars
=
[
p
.
name
()
for
p
in
graph
.
all_persistable_vars
()]
ops
=
graph
.
all_ops
()
for
op_node
in
ops
:
op_name
=
op_node
.
name
()
if
op_name
in
self
.
_fake_quant_op_names
:
input_arg_name
=
op_node
.
op
().
input
(
'X'
)[
0
]
if
input_arg_name
in
persistable_vars
:
if
self
.
_weight_quantize_type
==
'abs_max'
:
param
=
self
.
_load_var
(
input_arg_name
)
scale_v
=
np
.
max
(
np
.
abs
(
param
))
else
:
scale_v
=
self
.
_load_var
(
op_node
.
op
().
output
(
'OutScale'
)
[
0
])[
0
]
self
.
_var_scale_map
[
input_arg_name
]
=
scale_v
else
:
scale_v
=
graph
.
var_node
(
op_node
.
op
().
output
(
'OutScale'
)[
0
])
self
.
_var_scale_map
[
input_arg_name
]
=
scale_v
if
input_arg_name
in
persistable_vars
:
self
.
_remove_fake_quant_and_dequant_op
(
graph
,
op_node
)
# quantize weight and restore
param_v
=
self
.
_load_var
(
input_arg_name
)
quantized_param_v
=
self
.
_quant
(
param_v
,
scale_v
,
self
.
_weight_bits
)
self
.
_restore_var
(
input_arg_name
,
quantized_param_v
)
ops
=
graph
.
all_ops
()
for
op_node
in
ops
:
op_name
=
op_node
.
name
()
if
op_name
in
self
.
_fake_dequant_op_names
:
self
.
_remove_fake_quant_and_dequant_op
(
graph
,
op_node
)
ops
=
graph
.
all_ops
()
for
op_node
in
ops
:
op_name
=
op_node
.
name
()
if
op_name
in
self
.
_quantizable_ops
:
self
.
_insert_post_dequant_op
(
graph
,
op_node
)
for
op_node
in
ops
:
# insert dequant_op after fc/conv, need to rename inputs of the followed ops
for
var_node
in
op_node
.
inputs
:
name
=
var_node
.
name
()
if
name
in
self
.
_op_output_rename_map
:
old_in
=
graph
.
var_node
(
name
)
new_in
=
self
.
_op_output_rename_map
[
name
]
graph
.
update_input_link
(
old_in
,
new_in
,
op_node
)
# remove the unused var node in the graph
self
.
_remove_unused_var_nodes
(
graph
)
return
graph
def
_remove_fake_quant_and_dequant_op
(
self
,
graph
,
op_node
):
k
=
op_node
.
op
().
output
(
'Out'
)[
0
]
v
=
op_node
.
op
().
input
(
'X'
)[
0
]
if
v
not
in
self
.
_op_input_rename_map
:
self
.
_op_input_rename_map
[
k
]
=
v
else
:
self
.
_op_input_rename_map
[
k
]
=
self
.
_op_input_rename_map
[
v
]
graph
.
safe_remove_nodes
(
op_node
)
def
_insert_post_dequant_op
(
self
,
graph
,
op_node
):
max_range
=
None
scale_var_node
=
None
persistable_vars
=
[
p
.
name
()
for
p
in
graph
.
all_persistable_vars
()]
for
var_node
in
op_node
.
inputs
:
name
=
var_node
.
name
()
if
name
in
self
.
_op_input_rename_map
:
old_in
=
graph
.
var_node
(
name
)
new_in
=
graph
.
var_node
(
self
.
_op_input_rename_map
[
name
])
new_in
.
clear_outputs
()
graph
.
update_input_link
(
old_in
,
new_in
,
op_node
)
original_var_name
=
self
.
_original_var_name
(
name
)
scale_v
=
self
.
_var_scale_map
[
original_var_name
]
if
original_var_name
in
persistable_vars
:
param_range
=
(
1
<<
(
self
.
_weight_bits
-
1
))
-
1
act_range
=
(
1
<<
(
self
.
_activation_bits
-
1
))
-
1
assert
self
.
_is_float
(
scale_v
),
'The scale of parameter %s is not a float.'
%
(
original_var_name
)
max_range
=
param_range
*
act_range
/
scale_v
else
:
assert
isinstance
(
scale_v
,
core
.
Node
)
scale_var_node
=
self
.
_var_scale_map
[
original_var_name
]
if
len
(
op_node
.
outputs
)
!=
1
:
raise
ValueError
(
"Only support one output, but op %s has"
" more than one output."
%
(
op_node
.
name
()))
output_var_node
=
op_node
.
outputs
[
0
]
dequant_var_node
=
graph
.
create_var_node
(
name
=
self
.
_dequantized_var_name
(
output_var_node
.
name
()),
var_type
=
output_var_node
.
var
().
type
(),
shape
=
output_var_node
.
var
().
shape
(),
var_dtype
=
output_var_node
.
var
().
dtype
())
dequant_op_node
=
graph
.
create_op_node
(
op_type
=
'fake_dequantize_max_abs'
,
attrs
=
{
'max_range'
:
float
(
max_range
),
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
},
inputs
=
{
'X'
:
output_var_node
,
'Scale'
:
scale_var_node
},
outputs
=
{
'Out'
:
dequant_var_node
})
graph
.
link_to
(
output_var_node
,
dequant_op_node
)
graph
.
link_to
(
scale_var_node
,
dequant_op_node
)
graph
.
link_to
(
dequant_op_node
,
dequant_var_node
)
self
.
_op_output_rename_map
[
output_var_node
.
name
()]
=
dequant_var_node
return
dequant_var_node
def
_load_var
(
self
,
name
):
return
np
.
array
(
self
.
_scope
.
find_var
(
name
).
get_tensor
())
def
_restore_var
(
self
,
name
,
array
):
tensor
=
self
.
_scope
.
find_var
(
name
).
get_tensor
()
tensor
.
set
(
array
,
self
.
_place
)
def
_remove_unused_var_nodes
(
self
,
graph
):
all_used_vars
=
set
()
ops
=
graph
.
all_ops
()
for
op_node
in
ops
:
for
input_node
in
op_node
.
inputs
:
all_used_vars
.
add
(
input_node
)
for
output_node
in
op_node
.
outputs
:
all_used_vars
.
add
(
output_node
)
all_unused_vars
=
graph
.
all_vars
()
-
all_used_vars
graph
.
safe_remove_nodes
(
all_unused_vars
)
def
_original_var_name
(
self
,
var_name
):
"""
Return the original variable name.
"""
if
var_name
.
endswith
(
'.quantized.dequantized'
):
return
var_name
[:
-
len
(
'.quantized.dequantized'
)]
if
var_name
.
endswith
(
'.quantized'
):
return
var_name
[:
-
len
(
'.quantized'
)]
if
var_name
.
endswith
(
'.dequantized'
):
return
var_name
[:
-
len
(
'.dequantized'
)]
if
var_name
.
endswith
(
'.scale'
):
return
var_name
[:
-
len
(
'.scale'
)]
else
:
return
var_name
def
_dequantized_var_name
(
self
,
var_name
):
"""
Return dequantized variable name for the input `var_name`.
"""
return
"%s.dequantized"
%
(
var_name
)
def
_is_float
(
self
,
v
):
return
isinstance
(
v
,
float
)
or
isinstance
(
v
,
np
.
float32
)
\
or
isinstance
(
v
,
np
.
float64
)
def
_quant
(
self
,
x
,
scale
,
num_bits
):
return
np
.
round
(
x
/
scale
*
((
1
<<
(
num_bits
-
1
))
-
1
))
class
ConvertToInt8Pass
(
object
):
"""
Convert the weights into int8_t type.
Args:
scope(fluid.Scope): scope is used to get the weight tensor values.
place(fluid.CPUPlace|fluid.CUDAPlace): place is used to restore the
8bits weight tensors.
"""
def
__init__
(
self
,
scope
,
place
):
assert
scope
is
not
None
,
\
'The scope cannot be set None.'
assert
place
is
not
None
,
\
'The place cannot be set None.'
self
.
_scope
=
scope
self
.
_place
=
place
self
.
_quantizable_ops
=
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
]
def
apply
(
self
,
graph
):
"""
Convert weights' tpye of the graph. After that, the data type of the
graph weigths is int8_t.
Args:
graph(IrGraph): the applied graph.
"""
persistable_vars
=
[
p
.
name
()
for
p
in
graph
.
all_persistable_vars
()]
ops
=
graph
.
all_ops
()
input_map
=
{}
for
op_node
in
ops
:
op_name
=
op_node
.
name
()
if
op_name
in
self
.
_quantizable_ops
:
for
var_node
in
op_node
.
inputs
:
name
=
var_node
.
name
()
if
name
in
persistable_vars
:
if
name
not
in
input_map
:
int8_var_node
=
self
.
_convert_to_int8
(
graph
,
var_node
)
input_map
[
name
]
=
int8_var_node
graph
.
update_input_link
(
var_node
,
input_map
[
name
],
op_node
)
# remove the unused var node in the graph
self
.
_remove_unused_var_nodes
(
graph
)
return
graph
def
_convert_to_int8
(
self
,
graph
,
var_node
):
int8_var_node_name
=
var_node
.
name
()
+
".int8"
int8_var_node
=
graph
.
create_param_node
(
name
=
cpt
.
to_text
(
int8_var_node_name
),
var_type
=
var_node
.
var
().
type
(),
shape
=
var_node
.
var
().
shape
(),
var_dtype
=
core
.
VarDesc
.
VarType
.
INT8
)
array
=
self
.
_load_var
(
var_node
.
name
())
self
.
_scope
.
var
(
int8_var_node_name
)
self
.
_store_var
(
int8_var_node_name
,
array
,
np
.
int8
)
return
int8_var_node
def
_load_var
(
self
,
name
):
return
np
.
array
(
self
.
_scope
.
find_var
(
name
).
get_tensor
())
def
_store_var
(
self
,
name
,
array
,
dtype
):
tensor
=
self
.
_scope
.
find_var
(
name
).
get_tensor
()
tensor
.
set
(
array
.
astype
(
dtype
),
self
.
_place
)
def
_remove_unused_var_nodes
(
self
,
graph
):
all_used_vars
=
set
()
ops
=
graph
.
all_ops
()
for
op_node
in
ops
:
for
input_node
in
op_node
.
inputs
:
all_used_vars
.
add
(
input_node
)
for
output_node
in
op_node
.
outputs
:
all_used_vars
.
add
(
output_node
)
all_unused_vars
=
graph
.
all_vars
()
-
all_used_vars
graph
.
safe_remove_nodes
(
all_unused_vars
)
class
TransformForMobilePass
(
object
):
"""
This pass is used to convert the freezed graph for paddle-mobile execution.
"""
def
__init__
(
self
):
self
.
_fake_quant_op_names
=
[
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
]
self
.
_fake_dequant_op_names
=
[
'fake_dequantize_max_abs'
]
def
apply
(
self
,
graph
):
"""
Because paddle-mobile use `quantize` an `dequantize` as the names of
quantize operator and dequantize operator, the `apply` function just
realize this logic.
Args:
graph(IrGraph): the graph will be transformed.
"""
ops
=
graph
.
all_ops
()
for
op_node
in
ops
:
name
=
op_node
.
name
()
if
name
in
self
.
_fake_quant_op_names
:
op_node
.
op
().
set_type
(
'quantize'
)
quant_node
=
graph
.
create_op_node_from_desc
(
op_node
.
op
())
for
input_node
in
op_node
.
inputs
:
graph
.
link_to
(
input_node
,
quant_node
)
for
output_node
in
op_node
.
outputs
:
graph
.
link_to
(
quant_node
,
output_node
)
graph
.
safe_remove_nodes
(
op_node
)
if
name
in
self
.
_fake_dequant_op_names
:
op_node
.
op
().
set_type
(
'dequantize'
)
dequant_node
=
graph
.
create_op_node_from_desc
(
op_node
.
op
())
for
input_node
in
op_node
.
inputs
:
graph
.
link_to
(
input_node
,
dequant_node
)
for
output_node
in
op_node
.
outputs
:
graph
.
link_to
(
dequant_node
,
output_node
)
graph
.
safe_remove_nodes
(
op_node
)
return
graph
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
0 → 100644
浏览文件 @
5998d3cc
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
foreach
(
src
${
TEST_OPS
}
)
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endforeach
()
python/paddle/fluid/contrib/slim/
unitest
/__init__.py
→
python/paddle/fluid/contrib/slim/
tests
/__init__.py
浏览文件 @
5998d3cc
文件已移动
python/paddle/fluid/contrib/slim/
unitest
/configs/config.yaml
→
python/paddle/fluid/contrib/slim/
tests
/configs/config.yaml
浏览文件 @
5998d3cc
version
:
1.0
include
:
[
"
./
unitest/configs/pruners.yaml"
,
"
./unitest
/configs/pruners_0.yaml"
]
include
:
[
"
./
configs/pruners.yaml"
,
"
.
/configs/pruners_0.yaml"
]
pruners
:
pruner_1
:
class
:
'
RatioPruner'
...
...
python/paddle/fluid/contrib/slim/
unitest
/configs/pruners.yaml
→
python/paddle/fluid/contrib/slim/
tests
/configs/pruners.yaml
浏览文件 @
5998d3cc
文件已移动
python/paddle/fluid/contrib/slim/
unitest
/configs/pruners_0.yaml
→
python/paddle/fluid/contrib/slim/
tests
/configs/pruners_0.yaml
浏览文件 @
5998d3cc
文件已移动
python/paddle/fluid/contrib/slim/
unitest
/test_factory.py
→
python/paddle/fluid/contrib/slim/
tests
/test_factory.py
浏览文件 @
5998d3cc
...
...
@@ -18,7 +18,7 @@ import unittest
class
TestFactory
(
unittest
.
TestCase
):
def
test_parse
(
self
):
factory
=
ConfigFactory
(
'./
unitest/
configs/config.yaml'
)
factory
=
ConfigFactory
(
'./configs/config.yaml'
)
pruner
=
factory
.
instance
(
'pruner_1'
)
self
.
assertEquals
(
pruner
.
ratios
[
'conv1_1.w'
],
0.3
)
...
...
python/paddle/fluid/contrib/slim/tests/test_graph.py
0 → 100644
浏览文件 @
5998d3cc
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
from
__future__
import
print_function
import
unittest
import
paddle.fluid
as
fluid
import
six
from
paddle.fluid.framework
import
IrGraph
from
paddle.fluid
import
core
def
residual_block
(
num
):
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
,
bias_attr
=
False
):
tmp
=
fluid
.
layers
.
conv2d
(
input
=
input
,
filter_size
=
filter_size
,
num_filters
=
ch_out
,
stride
=
stride
,
padding
=
padding
,
act
=
None
,
bias_attr
=
bias_attr
)
return
fluid
.
layers
.
batch_norm
(
input
=
tmp
,
act
=
act
)
data
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
1
,
32
,
32
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
hidden
=
data
for
_
in
six
.
moves
.
xrange
(
num
):
conv
=
conv_bn_layer
(
hidden
,
16
,
3
,
1
,
1
,
act
=
None
,
bias_attr
=
True
)
short
=
conv_bn_layer
(
hidden
,
16
,
1
,
1
,
0
,
act
=
None
)
hidden
=
fluid
.
layers
.
elementwise_add
(
x
=
conv
,
y
=
short
,
act
=
'relu'
)
fc
=
fluid
.
layers
.
fc
(
input
=
hidden
,
size
=
10
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
fc
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
return
loss
class
TestGraph
(
unittest
.
TestCase
):
def
test_graph_functions
(
self
):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
loss
=
residual_block
(
2
)
opt
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
graph
=
IrGraph
(
core
.
Graph
(
main
.
desc
),
for_test
=
False
)
marked_nodes
=
set
()
for
op
in
graph
.
all_ops
():
if
op
.
name
().
find
(
'conv2d'
)
>
-
1
:
marked_nodes
.
add
(
op
)
graph
.
draw
(
'.'
,
'residual'
,
marked_nodes
)
self
.
assertFalse
(
graph
.
has_circle
())
self
.
assertEqual
(
graph
.
graph_num
(),
1
)
nodes
=
graph
.
topology_sort
()
self
.
assertEqual
(
len
(
nodes
),
len
(
graph
.
all_ops
()))
nodes_map
=
graph
.
build_adjacency_list
()
self
.
assertEqual
(
len
(
nodes_map
),
len
(
graph
.
all_ops
()))
nodes_num
=
len
(
graph
.
all_nodes
())
graph
.
safe_remove_nodes
(
marked_nodes
)
self
.
assertEqual
(
len
(
graph
.
all_nodes
()),
nodes_num
-
len
(
marked_nodes
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/contrib/slim/
unitest
/test_quantization_pass.py
→
python/paddle/fluid/contrib/slim/
tests
/test_quantization_pass.py
浏览文件 @
5998d3cc
...
...
@@ -17,9 +17,12 @@ import random
import
numpy
as
np
import
paddle.fluid
as
fluid
import
six
from
paddle.fluid.framework
import
Program
import
paddle
from
paddle.fluid.framework
import
IrGraph
from
paddle.fluid.contrib.slim.quantization
import
QuantizationTransformPass
from
paddle.fluid.contrib.slim.quantization
import
QuantizationFreezePass
from
paddle.fluid.contrib.slim.quantization
import
ConvertToInt8Pass
from
paddle.fluid.contrib.slim.quantization
import
TransformForMobilePass
from
paddle.fluid
import
core
...
...
@@ -65,6 +68,28 @@ def residual_block(num):
return
loss
def
conv_net
(
img
,
label
):
conv_pool_1
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
img
,
filter_size
=
5
,
num_filters
=
20
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
conv_pool_1
=
fluid
.
layers
.
batch_norm
(
conv_pool_1
)
conv_pool_2
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
conv_pool_1
,
filter_size
=
5
,
num_filters
=
50
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
prediction
=
fluid
.
layers
.
fc
(
input
=
conv_pool_2
,
size
=
10
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
return
avg_loss
class
TestQuantizationTransformPass
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
quantizable_op_and_inputs
=
{
...
...
@@ -171,5 +196,177 @@ class TestQuantizationTransformPass(unittest.TestCase):
self
.
residual_block_quant
(
'range_abs_max'
)
class
TestQuantizationFreezePass
(
unittest
.
TestCase
):
def
freeze_graph
(
self
,
use_cuda
,
seed
,
quant_type
):
def
build_program
(
main
,
startup
,
is_test
):
main
.
random_seed
=
seed
startup
.
random_seed
=
seed
with
fluid
.
unique_name
.
guard
():
with
fluid
.
program_guard
(
main
,
startup
):
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
loss
=
conv_net
(
img
,
label
)
if
not
is_test
:
opt
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
return
[
img
,
label
],
loss
random
.
seed
(
0
)
np
.
random
.
seed
(
0
)
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
test_program
=
fluid
.
Program
()
feeds
,
loss
=
build_program
(
main
,
startup
,
False
)
build_program
(
test_program
,
startup
,
True
)
test_program
=
test_program
.
clone
(
for_test
=
True
)
main_graph
=
IrGraph
(
core
.
Graph
(
main
.
desc
),
for_test
=
False
)
test_graph
=
IrGraph
(
core
.
Graph
(
test_program
.
desc
),
for_test
=
True
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
scope
=
fluid
.
Scope
()
with
fluid
.
scope_guard
(
scope
):
exe
.
run
(
startup
)
transform_pass
=
QuantizationTransformPass
(
scope
=
scope
,
program_exe
=
exe
,
activation_quantize_type
=
quant_type
)
transform_pass
.
apply
(
main_graph
)
transform_pass
.
apply
(
test_graph
)
dev_name
=
'_gpu_'
if
use_cuda
else
'_cpu_'
marked_nodes
=
set
()
for
op
in
main_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
main_graph
.
draw
(
'.'
,
'main'
+
dev_name
+
quant_type
,
marked_nodes
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test'
+
dev_name
+
quant_type
,
marked_nodes
)
quantized_main_program
=
main_graph
.
to_program
()
quantized_test_program
=
test_graph
.
to_program
()
iters
=
5
batch_size
=
8
#train_exe = fluid.ParallelExecutor(
# main_program=quantized_main_program,
# use_cuda=bool(use_cuda),
# loss_name=loss.name,
# scope=scope)
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
500
),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feeds
,
place
=
place
)
with
fluid
.
scope_guard
(
scope
):
for
_
in
range
(
iters
):
data
=
next
(
train_reader
())
loss_v
=
exe
.
run
(
program
=
quantized_main_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
])
#loss_v = train_exe.run(feed=feeder.feed(data),
# fetch_list=[loss.name])
#print('{}: {}'.format('loss' + dev_name + quant_type, loss_v))
test_data
=
next
(
test_reader
())
with
fluid
.
program_guard
(
quantized_test_program
):
w_var
=
fluid
.
framework
.
_get_var
(
'conv2d_1.w_0.quantized'
,
quantized_test_program
)
# Testing
with
fluid
.
scope_guard
(
scope
):
test_loss1
,
w_quant
=
exe
.
run
(
program
=
quantized_test_program
,
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
loss
,
w_var
])
# Freeze graph for inference, but the weight of fc/conv is still float type.
freeze_pass
=
QuantizationFreezePass
(
scope
=
scope
,
place
=
place
)
freeze_pass
.
apply
(
test_graph
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test_freeze'
+
dev_name
+
quant_type
,
marked_nodes
)
server_program
=
test_graph
.
to_program
()
with
fluid
.
scope_guard
(
scope
):
test_loss2
,
=
exe
.
run
(
program
=
server_program
,
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
loss
])
self
.
assertAlmostEqual
(
test_loss1
,
test_loss2
,
delta
=
5e-3
)
#print('{}: {}'.format('test_loss1' + dev_name + quant_type, test_loss1))
#print('{}: {}'.format('test_loss2' + dev_name + quant_type, test_loss2))
w_freeze
=
np
.
array
(
scope
.
find_var
(
'conv2d_1.w_0'
).
get_tensor
())
# Maybe failed, this is due to the calculation precision
# self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
#print('{}: {}'.format('w_freeze' + dev_name + quant_type,
# np.sum(w_freeze)))
#print('{}: {}'.format('w_quant' + dev_name + quant_type,
# np.sum(w_quant)))
# Convert parameter to 8-bit.
convert_int8_pass
=
ConvertToInt8Pass
(
scope
=
scope
,
place
=
place
)
convert_int8_pass
.
apply
(
test_graph
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test_int8'
+
dev_name
+
quant_type
,
marked_nodes
)
server_program_int8
=
test_graph
.
to_program
()
# Save the 8-bit parameter and model file.
with
fluid
.
scope_guard
(
scope
):
fluid
.
io
.
save_inference_model
(
'server_int8'
+
dev_name
+
quant_type
,
[
'image'
,
'label'
],
[
loss
],
exe
,
server_program_int8
)
# Test whether the 8-bit parameter and model file can be loaded successfully.
[
infer
,
feed
,
fetch
]
=
fluid
.
io
.
load_inference_model
(
'server_int8'
+
dev_name
+
quant_type
,
exe
)
# Check the loaded 8-bit weight.
w_8bit
=
np
.
array
(
scope
.
find_var
(
'conv2d_1.w_0.int8'
).
get_tensor
())
self
.
assertEqual
(
w_8bit
.
dtype
,
np
.
int8
)
self
.
assertEqual
(
np
.
sum
(
w_8bit
),
np
.
sum
(
w_freeze
))
#print('{}: {}'.format('w_8bit' + dev_name + quant_type, np.sum(w_8bit)))
#print('{}: {}'.format('w_freeze' + dev_name + quant_type,
# np.sum(w_freeze)))
mobile_pass
=
TransformForMobilePass
()
mobile_pass
.
apply
(
test_graph
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test_mobile'
+
dev_name
+
quant_type
,
marked_nodes
)
mobile_program
=
test_graph
.
to_program
()
with
fluid
.
scope_guard
(
scope
):
fluid
.
io
.
save_inference_model
(
'mobile_int8'
+
dev_name
+
quant_type
,
[
'image'
,
'label'
],
[
loss
],
exe
,
mobile_program
)
def
test_freeze_graph_cuda_dynamic
(
self
):
if
fluid
.
core
.
is_compiled_with_cuda
():
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
True
,
seed
=
1
,
quant_type
=
'abs_max'
)
def
test_freeze_graph_cpu_dynamic
(
self
):
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
False
,
seed
=
2
,
quant_type
=
'abs_max'
)
def
test_freeze_graph_cuda_static
(
self
):
if
fluid
.
core
.
is_compiled_with_cuda
():
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
True
,
seed
=
1
,
quant_type
=
'range_abs_max'
)
def
test_freeze_graph_cpu_static
(
self
):
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
False
,
seed
=
2
,
quant_type
=
'range_abs_max'
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/contrib/tests/CMakeLists.txt
浏览文件 @
5998d3cc
...
...
@@ -6,5 +6,9 @@ if(APPLE OR WIN32 OR NOT WITH_MKL)
endif
()
foreach
(
src
${
TEST_OPS
}
)
if
(
src MATCHES
"test_calibration"
)
py_test
(
${
src
}
SRCS
${
src
}
.py ENVS FLAGS_use_mkldnn=true
)
else
()
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endif
()
endforeach
()
python/paddle/fluid/contrib/tests/test_calibration.py
浏览文件 @
5998d3cc
...
...
@@ -199,7 +199,6 @@ class TestCalibrationForResnet50(unittest.TestCase):
def
run_program
(
self
,
model_path
,
generate_int8
=
False
,
algo
=
'direct'
):
image_shape
=
[
3
,
224
,
224
]
os
.
environ
[
'FLAGS_use_mkldnn'
]
=
'True'
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
...
...
@@ -241,9 +240,6 @@ class TestCalibrationForResnet50(unittest.TestCase):
label
=
label
.
reshape
([
-
1
,
1
])
running_program
=
calibrator
.
sampling_program
.
clone
(
)
if
generate_int8
else
infer_program
.
clone
()
for
op
in
running_program
.
current_block
().
ops
:
if
op
.
has_attr
(
"use_mkldnn"
):
op
.
_set_attr
(
"use_mkldnn"
,
True
)
t1
=
time
.
time
()
_
,
acc1
,
_
=
exe
.
run
(
...
...
python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
浏览文件 @
5998d3cc
...
...
@@ -204,9 +204,11 @@ class TestQuantizeTranspiler(unittest.TestCase):
build_program
(
test_program
,
startup
,
True
)
test_program
=
test_program
.
clone
(
for_test
=
True
)
quant_transpiler
=
QuantizeTranspiler
()
quant_transpiler
.
training_transpile
(
main
)
quant_transpiler
.
training_transpile
(
test_program
)
quant_type
=
'range_abs_max'
# 'range_abs_max' or 'abs_max'
quant_transpiler
=
QuantizeTranspiler
(
activation_quantize_type
=
quant_type
)
quant_transpiler
.
training_transpile
(
main
,
startup
)
quant_transpiler
.
training_transpile
(
test_program
,
startup
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
...
...
python/paddle/fluid/framework.py
浏览文件 @
5998d3cc
...
...
@@ -16,6 +16,8 @@ from __future__ import print_function
import
collections
from
collections
import
defaultdict
from
collections
import
Iterable
import
contextlib
from
.wrapped_decorator
import
signature_safe_contextmanager
import
os
import
re
...
...
@@ -555,7 +557,8 @@ class OpProtoHolder(object):
return
{
core
.
op_proto_and_checker_maker
.
kOpRoleAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpRoleVarAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpNameScopeAttrName
()
core
.
op_proto_and_checker_maker
.
kOpNameScopeAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpCreationCallstackAttrName
()
}
...
...
@@ -1529,12 +1532,16 @@ class Block(object):
class
IrGraph
(
object
):
"""
IrGraph uses core.Graph as the delegation to accomplish the manipulation.
Python IrGraph. Beneath it is a core.Graph, which is used for
create a c++ Ir Pass Graph. An IrGraph is just a graph view of
a Program. In an IrGraph, both Variables and Operators are graph
nodes.
"""
def
__init__
(
self
,
graph
,
for_test
=
False
):
"""
Construct the IrGraph using core.Graph.
Construct an IrGraph using core.Graph.
Args:
graph(core.Graph): C++ Graph.
for_test(bool): True for the test graph and false for the train graph.
...
...
@@ -1545,23 +1552,81 @@ class IrGraph(object):
self
.
_for_test
=
for_test
def
is_test
(
self
):
"""
If the graph is used for testing, the function returns true. Otherwise, returns false.
"""
return
self
.
_for_test
def
all_parameters
(
self
):
param_nodes
=
set
()
for
node
in
self
.
graph
.
nodes
():
if
node
.
is_var
()
and
node
.
var
()
is
not
None
and
node
.
var
(
).
persistable
():
param_nodes
.
add
(
node
)
return
param_nodes
def
all_nodes
(
self
):
"""
Return all nodes included in the graph as a set.
"""
return
{
node
for
node
in
self
.
graph
.
nodes
()}
def
all_vars
(
self
):
"""
Return all variable nodes included in the graph as a set.
"""
return
{
node
for
node
in
self
.
graph
.
nodes
()
if
node
.
is_var
()}
def
all_persistable_vars
(
self
):
"""
Return all persistable variable nodes included in the graph as a set.
"""
persistable_nodes
=
set
()
for
node
in
self
.
graph
.
nodes
():
if
node
.
is_var
()
and
node
.
var
()
is
not
None
and
node
.
var
(
).
persistable
():
persistable_nodes
.
add
(
node
)
return
persistable_nodes
def
all_ops
(
self
):
"""
Return all operator nodes included in the graph as a set.
"""
return
{
node
for
node
in
self
.
graph
.
nodes
()
if
node
.
is_op
()}
def
var_node
(
self
,
name
):
"""
Get a variable node by name from the graph.
Args:
name(str): the name of the variable node.
Raises:
ValueError: The If input's type is not str, or this graph
doesn't have a variable with the giving name.
Returns:
core.Node: the variable node with the giving name.
"""
if
not
isinstance
(
name
,
six
.
string_types
):
raise
TypeError
(
"var require string as parameter, but get %s instead."
%
(
type
(
name
)))
target_var_node
=
None
var_nodes
=
self
.
all_vars
()
for
var_node
in
var_nodes
:
if
var_node
.
name
()
==
name
:
target_var_node
=
var_node
if
target_var_node
is
None
:
raise
ValueError
(
"var_node %s not in this graph"
%
name
)
return
target_var_node
def
create_param_node
(
self
,
name
,
var_type
,
shape
,
var_dtype
):
"""
Create a persistable variable node in the graph. In IrGraph,
it can not distinguish between persistable variables and parameters.
Args:
name(str): the name of the persistable variable node.
vart_type(core.VarDesc.VarType): the type of the persistable variable node.
shape(list): the shape of the persistable variable node.
var_dtype(core.VarDesc.VarType): the data type of the persistable variable node.
Returns:
core.Node: the created persistable variable node.
"""
var_desc
=
core
.
VarDesc
(
name
)
var_desc
.
set_type
(
var_type
)
var_desc
.
set_shape
(
shape
)
...
...
@@ -1570,6 +1635,20 @@ class IrGraph(object):
return
self
.
graph
.
create_var_node
(
var_desc
)
def
create_var_node
(
self
,
name
,
var_type
,
shape
,
var_dtype
):
"""
Create a variable node in the graph. The created variable node is
not persistable.
Args:
name(str): the name of the variable node.
vart_type(core.VarDesc.VarType): the type of the variable node.
shape(list): the shape of the variable node.
var_dtype(core.VarDesc.VarType): the data type of the variable node.
Returns:
core.Node: the created variable node.
"""
var_desc
=
core
.
VarDesc
(
name
)
var_desc
.
set_type
(
var_type
)
var_desc
.
set_shape
(
shape
)
...
...
@@ -1577,19 +1656,41 @@ class IrGraph(object):
return
self
.
graph
.
create_var_node
(
var_desc
)
def
create_var_node_from_desc
(
self
,
var_desc
):
"""
Create a variable node by using an existing VarDesc in the graph.
Depend on the giving VarDesc, the created variable node may be persistable.
Args:
var_desc(core.VarDesc): the giving variable description.
Returns:
core.Node: the created variable node.
"""
return
self
.
graph
.
create_var_node
(
var_desc
)
def
create_op_node
(
self
,
op_type
,
attrs
,
inputs
,
outputs
):
"""
Create a operator node in the graph.
Args:
op_type(str): the type of the operator node.
attrs(dict): the attributes of the operator node.
inputs(dict): the inputs of the operator node.
outputs(dict): the outpus of the operator node.
Returns:
core.Node: the created operator node.
"""
op_desc
=
core
.
OpDesc
()
op_desc
.
set_type
(
op_type
)
for
attr
,
value
in
attrs
.
iteritems
(
):
for
attr
,
value
in
six
.
iteritems
(
attrs
):
self
.
_update_desc_attr
(
op_desc
,
attr
,
value
)
for
input_name
,
var_nodes
in
inputs
.
iteritems
(
):
for
input_name
,
var_nodes
in
six
.
iteritems
(
inputs
):
if
not
isinstance
(
var_nodes
,
list
):
var_nodes
=
[
var_nodes
]
op_desc
.
set_input
(
input_name
,
[
var_node
.
name
()
for
var_node
in
var_nodes
])
for
output_name
,
var_nodes
in
outputs
.
iteritems
(
):
for
output_name
,
var_nodes
in
six
.
iteritems
(
outputs
):
if
not
isinstance
(
var_nodes
,
list
):
var_nodes
=
[
var_nodes
]
op_desc
.
set_output
(
output_name
,
...
...
@@ -1597,11 +1698,29 @@ class IrGraph(object):
return
self
.
graph
.
create_op_node
(
op_desc
)
def
create_op_node_from_desc
(
self
,
op_desc
):
"""
Create a operator node by using an existing OpDesc in the graph.
Args:
op_desc(core.VarDesc): the giving operator description.
Returns:
core.Node: the created operator node.
"""
return
self
.
graph
.
create_op_node
(
op_desc
)
def
update_input_link
(
self
,
old_input_node
,
new_input_node
,
op_node
):
assert
old_input_node
in
self
.
graph
.
nodes
()
and
new_input_node
in
self
.
graph
.
nodes
()
and
\
op_node
in
self
.
graph
.
nodes
(),
'Th three arguments must be in the graph nodes.'
"""
Update the input's link of a operator node.
Args:
old_input_node(core.Node): the old input node of the giving op_node.
new_input_node(core.Node): the new input node of the giving op_node.
op_node(core.Node): the operator node that is needed to update input's link.
"""
assert
old_input_node
in
self
.
graph
.
nodes
()
and
new_input_node
in
\
self
.
graph
.
nodes
()
and
op_node
in
self
.
graph
.
nodes
(),
\
'The three arguments(old_input_node&new_input_node&op_node) must be in the graph nodes.'
old_input_node
.
outputs_remove
(
op_node
)
op_node
.
inputs_remove
(
old_input_node
)
new_input_node
.
outputs_append
(
op_node
)
...
...
@@ -1609,17 +1728,85 @@ class IrGraph(object):
op_node
.
op
().
_rename_input
(
old_input_node
.
name
(),
new_input_node
.
name
())
def
link_to
(
self
,
node_in
,
node_out
):
"""
Connect two nodes.
Args:
node_in(core.Node): the input node.
node_out(core.Node): the output node.
"""
assert
node_in
in
self
.
graph
.
nodes
()
and
node_out
in
self
.
graph
.
nodes
(),
\
'Th
two arguments
must be in the graph nodes.'
'Th
e two arguments(node_in&node_out)
must be in the graph nodes.'
node_in
.
outputs_append
(
node_out
)
node_out
.
inputs_append
(
node_in
)
def
safe_remove_nodes
(
self
,
remove_nodes
):
"""
Remove nodes safely since links connected to these removed nodes are
also removed.
Args:
remove_nodes(set): the nodes prepared to be removed.
"""
if
not
isinstance
(
remove_nodes
,
set
):
if
isinstance
(
remove_nodes
,
Iterable
):
remove_nodes
=
set
(
remove_nodes
)
else
:
remove_nodes
=
{
remove_nodes
}
core
.
graph_safe_remove_nodes
(
self
.
graph
,
remove_nodes
)
def
draw
(
self
,
save_path
,
name
,
marked_nodes
=
None
):
def
has_circle
(
self
):
"""
Check if the graph has a circle.
Returns:
bool: True if the graph has a circle else False.
"""
return
core
.
has_circle
(
self
.
graph
)
def
graph_num
(
self
):
"""
Count the number of unconnected graphs in this graph.
Returns:
int: the number of unconnected graphs.
"""
return
core
.
graph_num
(
self
.
graph
)
def
topology_sort
(
self
):
"""
Perform the topology sort operation on the graph.
Notes: the `graph` cannot contain a circle.
Returns:
set(core.Node): nodes in topology order.
"""
return
core
.
topology_sort
(
self
.
graph
)
def
build_adjacency_list
(
self
):
"""
Build an adjacency list of operations for the `graph`.
Returns:
dict{core.Node: set(core.Node)}: the adjacency list.
"""
return
core
.
build_adjacency_list
(
self
.
graph
)
def
draw
(
self
,
save_path
,
name
,
marked_nodes
=
None
,
remove_ctr_var
=
True
):
"""
Draw the graph. If `dot` command is installed, the drawn graph
will be saved as pdf file type, otherwise dot file type is used.
Args:
save_path(str): the save path of drawn graph.
name(str): the name of drawn graph.
marked_nodes(set(core.Node)): nodes that are needed to be marked.
Default value is None.
remove_ctr_var(bool): If it is set True, all control variable nodes
in the graph will be removed. Default value is True.
"""
def
_convert_to_pdf
(
dot_file_path
):
pdf_save_path
=
os
.
path
.
splitext
(
dot_file_path
)[
0
]
+
'.pdf'
exited_code
=
subprocess
.
call
(
'dot -Tpdf '
+
dot_file_path
\
...
...
@@ -1629,15 +1816,17 @@ class IrGraph(object):
print
(
'The {} is saved as the dot filetype.'
.
format
(
dot_file_path
))
if
remove_ctr_var
:
remove_ctr_vars
=
set
()
ops_num
=
0
for
node
in
self
.
graph
.
nodes
():
if
node
.
is_ctrl_var
():
remove_ctr_vars
.
add
(
node
)
elif
node
.
is_op
():
self
.
safe_remove_nodes
(
remove_ctr_vars
)
ops_num
=
0
for
node
in
self
.
graph
.
nodes
():
if
node
.
is_op
():
ops_num
+=
1
print
(
'Total ops num = {}.'
.
format
(
ops_num
))
self
.
safe_remove_nodes
(
remove_ctr_vars
)
if
marked_nodes
is
not
None
:
if
not
isinstance
(
marked_nodes
,
set
):
marked_nodes
=
set
(
marked_nodes
)
...
...
@@ -1652,10 +1841,20 @@ class IrGraph(object):
_convert_to_pdf
(
viz_dot_path
)
def
to_program
(
self
):
"""
Convert the graph into a Program.
Notes: When the graph includes backward operator nodes, the
conversion process may be failed. Usually, this function is
only used to convert a test graph.
Returns:
Program: a program converted from the graph.
"""
convert_pass
=
core
.
get_pass
(
'graph_to_program_pass'
)
convert_pass
.
set
(
'program'
,
Program
().
desc
)
desc
=
core
.
ProgramDesc
()
convert_pass
.
set_not_owned
(
'program'
,
desc
)
convert_pass
.
apply
(
self
.
graph
)
desc
=
convert_pass
.
get_program
(
'program'
)
program
=
Program
.
_construct_from_desc
(
desc
)
return
program
...
...
python/paddle/fluid/imperative/layers.py
浏览文件 @
5998d3cc
...
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
collections
import
contextlib
import
sys
import
numpy
as
np
...
...
@@ -30,31 +31,45 @@ class Layer(core.Layer):
def
__init__
(
self
,
dtype
=
core
.
VarDesc
.
VarType
.
FP32
,
name
=
None
):
self
.
_built
=
False
self
.
_dtype
=
dtype
self
.
_parameters
=
collections
.
OrderedDict
()
self
.
_sub_layers
=
collections
.
OrderedDict
()
def
parameters
(
self
,
include_sublayers
=
True
):
"""Returns a list of Parameters from current and sub-layers.
Args:
include_sublayers: If true, also include the parameters from
sublayers.
Returns a list of Parameters.
"""
ret
=
[
p
for
p
in
self
.
_parameters
.
values
()]
if
include_sublayers
:
for
l
in
self
.
_sub_layers
.
values
():
for
p
in
l
.
parameters
(
include_sublayers
):
ret
.
append
(
p
)
return
ret
def
parameters
(
self
):
params
=
[]
for
key
in
self
.
__dict__
.
keys
():
value
=
self
.
__dict__
[
key
]
if
isinstance
(
value
,
framework
.
Parameter
):
params
.
append
(
value
)
elif
isinstance
(
value
,
core
.
Layer
):
params
.
extend
(
value
.
parameters
())
elif
isinstance
(
value
,
collections
.
Container
):
if
len
(
value
)
==
0
:
continue
if
isinstance
(
value
[
0
],
framework
.
Parameter
):
params
.
extend
(
value
)
elif
isinstance
(
value
[
0
],
core
.
Layer
):
for
v
in
value
:
params
.
extend
(
v
.
parameters
())
return
params
def
sublayers
(
self
,
include_sublayers
=
True
):
"""Returns a list of sub layers.
Args:
include_sublayers: If true, also include the layers from sublayers.
Returns a list of sub layers.
"""
ret
=
[
l
for
l
in
self
.
_sub_layers
.
values
()]
if
include_sublayers
:
for
l
in
self
.
_sub_layers
.
values
():
for
sub_l
in
l
.
sublayers
(
include_sublayers
):
ret
.
append
(
sub_l
)
return
ret
def
clear_gradients
(
self
):
for
p
in
self
.
parameters
():
p
.
_clear_gradient
()
def
_build_once
(
self
,
input
s
):
def
_build_once
(
self
,
*
arg
s
):
pass
def
__call__
(
self
,
*
inputs
):
...
...
@@ -71,6 +86,66 @@ class Layer(core.Layer):
def
backward
(
self
,
*
inputs
):
raise
ValueError
(
"Layer shouldn't implement backward"
)
def
add_sublayer
(
self
,
name
,
sublayer
):
"""Adds a sub Layer instance.
Added sublayer can be access like self.name.
Args:
name: name of this sublayer.
sublayer: an instance of Layer.
Returns:
the sublayer passed in.
"""
assert
isinstance
(
sublayer
,
core
.
Layer
)
self
.
_sub_layers
[
name
]
=
sublayer
return
sublayer
def
add_parameter
(
self
,
name
,
parameter
):
"""Adds a Parameter instance.
Added parameter can be access like self.name.
Args:
name: name of this sublayer.
parameter: an instance of Parameter.
Returns:
the parameter passed in.
"""
assert
isinstance
(
parameter
,
framework
.
Parameter
)
self
.
_parameters
[
name
]
=
parameter
return
parameter
def
__getattr__
(
self
,
name
):
if
name
in
self
.
_parameters
:
return
self
.
_parameters
[
name
]
elif
name
in
self
.
_sub_layers
:
return
self
.
_sub_layers
[
name
]
def
__setattr__
(
self
,
name
,
value
):
if
isinstance
(
value
,
framework
.
Parameter
):
params
=
self
.
__dict__
.
get
(
'_parameters'
,
None
)
if
params
is
None
:
raise
ValueError
(
"super(YourLayer, self).__init__() should be called first"
)
params
[
name
]
=
value
elif
isinstance
(
value
,
core
.
Layer
):
layers
=
self
.
__dict__
.
get
(
'_sub_layers'
,
None
)
if
layers
is
None
:
raise
ValueError
(
"super(YourLayer, self).__init__() should be called first"
)
layers
[
name
]
=
value
else
:
object
.
__setattr__
(
self
,
name
,
value
)
def
__delattr__
(
self
,
name
):
if
name
in
self
.
_parameters
:
del
self
.
_parameters
[
name
]
elif
name
in
self
.
_sub_layers
:
del
self
.
_sub_layers
[
name
]
else
:
object
.
__delattr__
(
self
,
name
)
class
PyLayer
(
core
.
PyLayer
):
"""Layers composed of user-defined python codes."""
...
...
python/paddle/fluid/imperative/nn.py
浏览文件 @
5998d3cc
...
...
@@ -225,9 +225,6 @@ class FC(layers.Layer):
act
=
act
,
name
=
name
)
def
parameters
(
self
):
return
[
self
.
_w
,
self
.
_b
]
def
_build_once
(
self
,
input
):
input_shape
=
input
.
shape
param_shape
=
[
...
...
@@ -478,9 +475,6 @@ class Embedding(layers.Layer):
dtype
=
self
.
_dtype
,
is_bias
=
False
)
def
parameters
(
self
):
return
[
self
.
_w
]
def
forward
(
self
,
input
):
out
=
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dtype
)
self
.
_helper
.
append_op
(
...
...
python/paddle/fluid/layers/control_flow.py
浏览文件 @
5998d3cc
...
...
@@ -506,9 +506,9 @@ class While(object):
while loop control flow.
Args:
cond
(Variable): condition used to compare.
cond(Variable): condition used to compare.
is_test(bool): A flag indicating whether execution is in test phase.
name
(str): The name of this layer.
name(str): The name of this layer.
Examples:
.. code-block:: python
...
...
@@ -589,7 +589,8 @@ class While(object):
def
lod_rank_table
(
x
,
level
=
0
):
"""LoD Rank Table Operator. Given an input variable **x** and a level number
"""
LoD Rank Table Operator. Given an input variable **x** and a level number
of LoD, this layer creates a LodRankTable object. A LoDRankTable object
contains a list of bi-element tuples. Each tuple consists of an index and
a length, both of which are int type. Refering to specified level of LoD,
...
...
@@ -883,10 +884,8 @@ def less_than(x, y, force_cpu=None, cond=None, **ignored):
return
cond
def
equal
(
x
,
y
,
cond
=
None
,
**
ignored
):
def
equal
(
x
,
y
,
cond
=
None
):
"""
**equal**
This layer returns the truth value of :math:`x == y` elementwise.
Args:
...
...
@@ -1458,7 +1457,6 @@ class DynamicRNN(object):
Returns:
The current timestep in the input sequence.
"""
self
.
_assert_in_rnn_block_
(
"step_input"
)
if
not
isinstance
(
x
,
Variable
):
...
...
@@ -1535,8 +1533,7 @@ class DynamicRNN(object):
@
signature_safe_contextmanager
def
block
(
self
):
"""
The block for user to define operators in RNN. See the class docstring
for more details.
The block for user to define operators in RNN.
"""
if
self
.
status
!=
DynamicRNN
.
BEFORE_RNN
:
raise
ValueError
(
"rnn.block() can only be invoke once"
)
...
...
@@ -1640,8 +1637,7 @@ class DynamicRNN(object):
dtype(str|numpy.dtype): The data type of the initialized memory.
Returns:
the memory variable.
The memory variable.
"""
self
.
_assert_in_rnn_block_
(
'memory'
)
self
.
_init_zero_idx_
()
...
...
@@ -1740,7 +1736,7 @@ class DynamicRNN(object):
def
output
(
self
,
*
outputs
):
"""
m
ark the RNN output variables.
M
ark the RNN output variables.
Args:
outputs: The output variables.
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
5998d3cc
...
...
@@ -56,7 +56,10 @@ def data(name,
Args:
name(str): The name/alias of the function
shape(list): Tuple declaring the shape.
shape(list): Tuple declaring the shape. If :code:`append_batch_size` is
True and there is no -1 inside :code:`shape`, it should be
considered as the shape of the each sample. Otherwise, it
should be considered as the shape of the batched data.
append_batch_size(bool):
1. If true, it prepends -1 to the shape.
For example if shape=[1], the resulting shape is [-1, 1].
...
...
python/paddle/fluid/layers/layer_function_generator.py
浏览文件 @
5998d3cc
...
...
@@ -24,7 +24,7 @@ from ..framework import OpProtoHolder, Variable, core, convert_np_dtype_to_dtype
from
..layer_helper
import
LayerHelper
__all__
=
[
'deprecated'
,
'generate_layer_fn'
,
'generate_
layer_fn_noattr
'
,
'autodoc'
,
'deprecated'
,
'generate_layer_fn'
,
'generate_
activation_fn
'
,
'autodoc'
,
'templatedoc'
]
...
...
@@ -89,6 +89,9 @@ def _generate_doc_string_(op_proto, additional_args_lines=None):
buf
.
write
(
'
\n
'
)
skip_attrs
=
OpProtoHolder
.
generated_op_attr_names
()
# attr use_mkldnn and is_test also should not be visible to users.
skip_attrs
.
add
(
"use_mkldnn"
)
skip_attrs
.
add
(
"is_test"
)
for
each_attr
in
op_proto
.
attrs
:
if
each_attr
.
name
in
skip_attrs
:
...
...
@@ -226,7 +229,7 @@ def generate_layer_fn(op_type):
return
func
def
generate_
layer_fn_noattr
(
op_type
):
def
generate_
activation_fn
(
op_type
):
"""Register the Python layer for an Operator without Attribute.
Args:
...
...
@@ -246,6 +249,7 @@ def generate_layer_fn_noattr(op_type):
func
.
__name__
=
op_type
func
.
__doc__
=
_generate_doc_string_
(
op_proto
)
return
func
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
5998d3cc
...
...
@@ -3236,7 +3236,7 @@ def group_norm(input,
# create output
mean_out
=
helper
.
create_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
variance_out
=
helper
.
create_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
group_norm_out
=
helper
.
create_variable
(
dtype
)
group_norm_out
=
helper
.
create_variable
(
dtype
=
dtype
)
helper
.
append_op
(
type
=
"group_norm"
,
...
...
@@ -5936,13 +5936,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
than :attr:`shape`.
act (str): The non-linear activation to be applied to the reshaped tensor
variable.
inplace(bool): Must use :attr:`False` if :attr:`x` is used in multiple
operators. If this flag is set :attr:`True`, reuse input
:attr:`x` to reshape, which will change the shape of
tensor variable :attr:`x` and might cause errors when
:attr:`x` is used in multiple operators. If :attr:`False`,
preserve the shape :attr:`x` and create a new output tensor
variable whose data is copied from input x but reshaped.
inplace(bool): If ``inplace`` is `True`, the input and output of ``layers.reshape``
are the same variable, otherwise, the input and output of
``layers.reshape`` are different variables. Note that if :attr:`x`
is more than one layer's input, ``inplace`` must be :attr:`False`.
name (str): The name of this layer. It is optional.
Returns:
...
...
@@ -8335,6 +8332,46 @@ def stack(x, axis=0):
If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x[0])+1`.
If :code:`axis` is None, it would be replaced with 0.
For Example:
.. code-block:: text
Case 1:
Input:
x[0].data = [ [1.0 , 2.0 ] ]
x[0].dims = [1, 2]
x[1].data = [ [3.0 , 4.0 ] ]
x[1].dims = [1, 2]
x[2].data = [ [5.0 , 6.0 ] ]
x[2].dims = [1, 2]
Attrs:
axis = 0
Output:
Out.data =[ [ [1.0, 2.0] ],
[ [3.0, 4.0] ],
[ [5.0, 6.0] ] ]
Out.dims = [3, 1, 2]
Case 2:
Given
x[0].data = [ [1.0 , 2.0 ] ]
x[0].dims = [1, 2]
x[1].data = [ [3.0 , 4.0 ] ]
x[1].dims = [1, 2]
x[2].data = [ [5.0 , 6.0 ] ]
x[2].dims = [1, 2]
Attrs:
axis = 1 or axis = -2
Output:
Out.data =[ [ [1.0, 2.0]
[3.0, 4.0]
[5.0, 6.0] ] ]
Out.dims = [1, 3, 2]
Args:
x (Variable|list(Variable)|tuple(Variable)): Input variables.
axis (int|None): The axis along which all inputs are stacked.
...
...
python/paddle/fluid/layers/ops.py
浏览文件 @
5998d3cc
...
...
@@ -14,7 +14,7 @@
from
__future__
import
print_function
import
os
from
.layer_function_generator
import
generate_layer_fn
,
generate_
layer_fn_noattr
from
.layer_function_generator
import
generate_layer_fn
,
generate_
activation_fn
from
..
import
core
from
..framework
import
convert_np_dtype_to_dtype_
...
...
@@ -53,7 +53,7 @@ globals()['_elementwise_div'] = generate_layer_fn('elementwise_div')
__all__
+=
__activations_noattr__
for
_OP
in
set
(
__activations_noattr__
):
globals
()[
_OP
]
=
generate_
layer_fn_noattr
(
_OP
)
globals
()[
_OP
]
=
generate_
activation_fn
(
_OP
)
__all__
+=
[
"uniform_random"
]
...
...
python/paddle/fluid/layers/tensor.py
浏览文件 @
5998d3cc
...
...
@@ -567,7 +567,7 @@ def ones(shape, dtype, force_cpu=False):
It also sets *stop_gradient* to True.
Args:
shape(tuple|list
|None
): Shape of output tensor
shape(tuple|list): Shape of output tensor
dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor
Returns:
...
...
@@ -578,6 +578,10 @@ def ones(shape, dtype, force_cpu=False):
data = fluid.layers.ones(shape=[1], dtype='int64')
"""
assert
isinstance
(
shape
,
list
)
or
isinstance
(
shape
,
tuple
),
"The shape's type should be list or tuple."
assert
reduce
(
lambda
x
,
y
:
x
*
y
,
shape
)
>
0
,
"The shape is invalid: %s."
%
(
str
(
shape
))
return
fill_constant
(
value
=
1.0
,
**
locals
())
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
5998d3cc
...
...
@@ -1368,9 +1368,9 @@ class FtrlOptimizer(Optimizer):
Args:
learning_rate (float|Variable): global learning rate.
l1 (float):
l2 (float):
lr_power (float):
l1 (float):
L1 regularization strength.
l2 (float):
L2 regularization strength.
lr_power (float):
Learning Rate Power.
regularization: A Regularizer, such as
fluid.regularizer.L2DecayRegularizer.
name: A optional name prefix.
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
5998d3cc
...
...
@@ -148,6 +148,8 @@ class ParallelExecutor(object):
else
framework
.
default_main_program
()
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
if
build_strategy
.
memory_optimize
is
None
:
build_strategy
.
memory_optimize
=
False
if
main
.
_is_mem_optimized
else
True
if
build_strategy
.
enable_inplace
is
None
:
build_strategy
.
enable_inplace
=
False
if
main
.
_is_mem_optimized
else
True
scope
=
scope
if
scope
is
not
None
else
executor
.
global_scope
()
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
5998d3cc
...
...
@@ -77,6 +77,7 @@ list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op)
list
(
REMOVE_ITEM TEST_OPS test_nearest_interp_op
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_resnet
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_optimizer
)
list
(
REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
endforeach
(
TEST_OP
)
...
...
@@ -107,6 +108,9 @@ py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SE
py_test_modules
(
test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL
)
set_tests_properties
(
test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450
)
py_test_modules
(
test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL
)
if
(
NOT WIN32
)
py_test_modules
(
test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer SERIAL
)
endif
()
if
(
NOT APPLE
)
py_test_modules
(
test_image_classification_resnet MODULES test_image_classification_resnet SERIAL
)
if
(
CMAKE_BUILD_TYPE STREQUAL
"Debug"
)
...
...
python/paddle/fluid/tests/unittests/ngraph/test_accuracy_ngraph_op.py
浏览文件 @
5998d3cc
...
...
@@ -15,39 +15,7 @@
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
paddle.fluid.tests.unittests.op_test
import
OpTest
class
TestNGRAPHAccuracyOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"accuracy"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
n
=
128
infer
=
np
.
random
.
random
((
n
,
1
)).
astype
(
self
.
dtype
)
indices
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
label
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
self
.
inputs
=
{
'Out'
:
infer
,
'Indices'
:
indices
,
"Label"
:
label
}
num_correct
=
0
for
rowid
in
range
(
n
):
for
ele
in
indices
[
rowid
]:
if
ele
==
label
[
rowid
]:
num_correct
+=
1
break
self
.
outputs
=
{
'Accuracy'
:
np
.
array
([
num_correct
/
float
(
n
)]).
astype
(
self
.
dtype
),
'Correct'
:
np
.
array
([
num_correct
]).
astype
(
"int64"
),
'Total'
:
np
.
array
([
n
]).
astype
(
"int64"
)
}
self
.
_cpu_only
=
True
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
self
.
check_output
()
from
paddle.fluid.tests.unittests.test_accuracy_op
import
TestAccuracyOp
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_batch_norm_ngraph_op.py
浏览文件 @
5998d3cc
...
...
@@ -17,21 +17,5 @@ from __future__ import print_function
import
unittest
from
paddle.fluid.tests.unittests.test_batch_norm_op
import
TestBatchNormOpTraining
,
TestBatchNormOpInference
class
TestNGRAPHBatchNormOpTraining
(
TestBatchNormOpTraining
):
def
init_kernel_type
(
self
):
super
(
TestNGRAPHBatchNormOpTraining
,
self
).
init_kernel_type
()
class
TestNGRAPHBatchNormOpInference
(
TestBatchNormOpInference
):
def
init_kernel_type
(
self
):
super
(
TestNGRAPHBatchNormOpInference
,
self
).
init_kernel_type
()
class
TestNGRAPHBatchNormOpWithReluInference
(
TestBatchNormOpInference
):
def
init_kernel_type
(
self
):
super
(
TestNGRAPHBatchNormOpWithReluInference
,
self
).
init_kernel_type
()
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_conv2d_ngraph_op.py
浏览文件 @
5998d3cc
...
...
@@ -17,60 +17,5 @@ from __future__ import print_function
import
unittest
from
paddle.fluid.tests.unittests.test_conv2d_op
import
TestConv2dOp
,
TestWithPad
,
TestWithStride
,
TestWithGroup
,
TestWith1x1
,
TestWithInput1x1Filter1x1
class
TestNGRAPH
(
TestConv2dOp
):
def
setUp
(
self
):
super
(
TestNGRAPH
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
super
(
TestNGRAPH
,
self
).
init_kernel_type
()
class
TestNGRAPHWithPad
(
TestWithPad
):
def
setUp
(
self
):
super
(
TestNGRAPHWithPad
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
super
(
TestNGRAPHWithPad
,
self
).
init_kernel_type
()
class
TestNGRAPHWithStride
(
TestWithStride
):
def
setUp
(
self
):
super
(
TestNGRAPHWithStride
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
super
(
TestNGRAPHWithStride
,
self
).
init_kernel_type
()
class
TestNGRAPHWithGroup
(
TestWithGroup
):
def
setUp
(
self
):
super
(
TestNGRAPHWithGroup
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
super
(
TestNGRAPHWithGroup
,
self
).
init_kernel_type
()
class
TestNGRAPHWith1x1
(
TestWith1x1
):
def
setUp
(
self
):
super
(
TestNGRAPHWith1x1
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
super
(
TestNGRAPHWith1x1
,
self
).
init_kernel_type
()
class
TestNGRAPHWithInput1x1Filter1x1
(
TestWithInput1x1Filter1x1
):
def
setUp
(
self
):
super
(
TestNGRAPHWithInput1x1Filter1x1
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
super
(
TestNGRAPHWithInput1x1Filter1x1
,
self
).
init_kernel_type
()
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py
0 → 100644
浏览文件 @
5998d3cc
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle.fluid.core
as
core
from
paddle.fluid.tests.unittests.op_test
import
OpTest
,
randomize_probability
class
TestCrossEntropyOp
(
OpTest
):
"""Test cross-entropy with discrete one-hot labels.
"""
def
setUp
(
self
):
self
.
op_type
=
"cross_entropy"
self
.
soft_label
=
False
self
.
ignore_index
=
-
100
self
.
dtype
=
np
.
float64
self
.
batch_size
=
30
self
.
class_num
=
10
self
.
_cpu_only
=
True
self
.
init_dtype_type
()
self
.
init_attr_type
()
self
.
init_bs_class_num
()
self
.
init_x
()
self
.
init_label
()
self
.
get_cross_entropy
()
self
.
inputs
=
{
"X"
:
self
.
x
,
"Label"
:
self
.
label
}
self
.
outputs
=
{
"Y"
:
self
.
cross_entropy
}
self
.
attrs
=
{
"soft_label"
:
self
.
soft_label
,
"ignore_index"
:
self
.
ignore_index
}
def
init_x
(
self
):
self
.
x
=
randomize_probability
(
self
.
batch_size
,
self
.
class_num
,
dtype
=
self
.
dtype
)
def
init_label
(
self
):
self
.
label
=
np
.
random
.
randint
(
0
,
self
.
class_num
,
(
self
.
batch_size
,
1
),
dtype
=
"int64"
)
def
get_cross_entropy
(
self
):
self
.
cross_entropy
=
np
.
asmatrix
(
[[
-
np
.
log
(
self
.
x
[
i
][
self
.
label
[
i
][
0
]])]
for
i
in
range
(
self
.
x
.
shape
[
0
])],
dtype
=
"float64"
)
def
init_attr_type
(
self
):
pass
def
init_dtype_type
(
self
):
pass
def
init_bs_class_num
(
self
):
pass
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Y"
,
numeric_grad_delta
=
0.001
)
class
TestCrossEntropyOp2
(
TestCrossEntropyOp
):
"""Test cross-entropy with vectorized soft labels.
"""
def
init_label
(
self
):
self
.
label
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
self
.
batch_size
,
self
.
class_num
]).
astype
(
self
.
dtype
)
self
.
label
/=
self
.
label
.
sum
(
axis
=
1
,
keepdims
=
True
)
def
get_cross_entropy
(
self
):
self
.
cross_entropy
=
(
-
self
.
label
*
np
.
log
(
self
.
x
)).
sum
(
axis
=
1
,
keepdims
=
True
).
astype
(
self
.
dtype
)
def
init_attr_type
(
self
):
self
.
soft_label
=
True
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float32
def
init_bs_class_num
(
self
):
self
.
batch_size
=
5
self
.
class_num
=
37
def
test_check_grad
(
self
):
self
.
check_grad
(
[
"X"
],
"Y"
,
max_relative_error
=
0.05
,
numeric_grad_delta
=
0.001
)
class
TestCrossEntropyOp3
(
TestCrossEntropyOp
):
"""Test cross-entropy with vectorized one-hot representation of labels.
"""
def
init_label
(
self
):
self
.
label_index
=
np
.
random
.
randint
(
0
,
self
.
class_num
,
(
self
.
batch_size
))
self
.
label
=
np
.
zeros
(
self
.
x
.
shape
).
astype
(
self
.
dtype
)
self
.
label
[
np
.
arange
(
self
.
batch_size
),
self
.
label_index
]
=
1
def
get_cross_entropy
(
self
):
self
.
cross_entropy
=
np
.
asmatrix
(
[[
-
np
.
log
(
self
.
x
[
i
][
self
.
label_index
[
i
]])]
for
i
in
range
(
self
.
x
.
shape
[
0
])]).
astype
(
self
.
dtype
)
def
init_attr_type
(
self
):
self
.
soft_label
=
True
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float32
def
init_bs_class_num
(
self
):
self
.
batch_size
=
5
self
.
class_num
=
17
def
test_check_grad
(
self
):
self
.
check_grad
(
[
"X"
],
"Y"
,
max_relative_error
=
0.05
,
numeric_grad_delta
=
0.001
)
class
TestCrossEntropyOp4
(
TestCrossEntropyOp
):
"""Test high rank tensor cross-entropy with discrete one-hot labels.
"""
def
init_x
(
self
):
self
.
shape
=
[
10
,
2
,
4
]
self
.
ins_num
=
np
.
prod
(
np
.
array
(
self
.
shape
))
self
.
X_2d
=
randomize_probability
(
self
.
ins_num
,
self
.
class_num
).
astype
(
self
.
dtype
)
self
.
x
=
self
.
X_2d
.
reshape
(
self
.
shape
+
[
self
.
class_num
])
def
init_label
(
self
):
self
.
label_2d
=
np
.
random
.
randint
(
0
,
self
.
class_num
,
(
self
.
ins_num
,
1
),
dtype
=
"int64"
)
self
.
label
=
self
.
label_2d
.
reshape
(
self
.
shape
+
[
1
])
def
get_cross_entropy
(
self
):
cross_entropy_2d
=
np
.
asmatrix
(
[[
-
np
.
log
(
self
.
X_2d
[
i
][
self
.
label_2d
[
i
][
0
]])]
for
i
in
range
(
self
.
X_2d
.
shape
[
0
])]).
astype
(
self
.
dtype
)
self
.
cross_entropy
=
np
.
array
(
cross_entropy_2d
).
reshape
(
self
.
shape
+
[
1
])
def
init_attr_type
(
self
):
self
.
soft_label
=
False
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float64
def
init_bs_class_num
(
self
):
self
.
class_num
=
10
class
TestCrossEntropyOp5
(
TestCrossEntropyOp
):
"""Test high rank tensor cross-entropy with vectorized soft labels.
"""
def
init_x
(
self
):
self
.
shape
=
[
4
,
3
]
self
.
ins_num
=
np
.
prod
(
np
.
array
(
self
.
shape
))
self
.
X_2d
=
randomize_probability
(
self
.
ins_num
,
self
.
class_num
).
astype
(
self
.
dtype
)
self
.
x
=
self
.
X_2d
.
reshape
(
self
.
shape
+
[
self
.
class_num
])
def
init_label
(
self
):
self
.
label_2d
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
self
.
ins_num
,
self
.
class_num
]).
astype
(
self
.
dtype
)
self
.
label_2d
/=
self
.
label_2d
.
sum
(
axis
=
1
,
keepdims
=
True
)
self
.
label
=
self
.
label_2d
.
reshape
(
self
.
shape
+
[
self
.
class_num
])
def
get_cross_entropy
(
self
):
cross_entropy_2d
=
(
-
self
.
label_2d
*
np
.
log
(
self
.
X_2d
)).
sum
(
axis
=
1
,
keepdims
=
True
).
astype
(
self
.
dtype
)
self
.
cross_entropy
=
np
.
array
(
cross_entropy_2d
).
reshape
(
self
.
shape
+
[
1
])
def
init_attr_type
(
self
):
self
.
soft_label
=
True
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float32
def
init_bs_class_num
(
self
):
self
.
class_num
=
37
def
test_check_grad
(
self
):
self
.
check_grad
(
[
"X"
],
"Y"
,
max_relative_error
=
0.05
,
numeric_grad_delta
=
0.001
)
class
TestCrossEntropyOp6
(
TestCrossEntropyOp
):
"""Test high rank tensor cross-entropy with vectorized one-hot representation of labels.
"""
def
init_x
(
self
):
self
.
shape
=
[
4
,
3
,
2
]
self
.
ins_num
=
np
.
prod
(
np
.
array
(
self
.
shape
))
self
.
X_2d
=
randomize_probability
(
self
.
ins_num
,
self
.
class_num
).
astype
(
self
.
dtype
)
self
.
x
=
self
.
X_2d
.
reshape
(
self
.
shape
+
[
self
.
class_num
])
def
init_label
(
self
):
self
.
label_index_2d
=
np
.
random
.
randint
(
0
,
self
.
class_num
,
(
self
.
ins_num
),
dtype
=
"int64"
)
label_2d
=
np
.
zeros
(
self
.
X_2d
.
shape
)
label_2d
[
np
.
arange
(
self
.
ins_num
),
self
.
label_index_2d
]
=
1
self
.
label
=
label_2d
.
reshape
(
self
.
shape
+
[
self
.
class_num
]).
astype
(
self
.
dtype
)
def
get_cross_entropy
(
self
):
cross_entropy_2d
=
np
.
asmatrix
(
[[
-
np
.
log
(
self
.
X_2d
[
i
][
self
.
label_index_2d
[
i
]])]
for
i
in
range
(
self
.
X_2d
.
shape
[
0
])])
self
.
cross_entropy
=
np
.
array
(
cross_entropy_2d
).
reshape
(
self
.
shape
+
[
1
]).
astype
(
self
.
dtype
)
def
init_attr_type
(
self
):
self
.
soft_label
=
True
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float32
def
init_bs_class_num
(
self
):
self
.
class_num
=
17
def
test_check_grad
(
self
):
self
.
check_grad
(
[
"X"
],
"Y"
,
max_relative_error
=
0.05
,
numeric_grad_delta
=
0.001
)
class
TestCrossEntropyOp7
(
TestCrossEntropyOp
):
"""Test cross-entropy with ignore index.
"""
def
init_label
(
self
):
self
.
label
=
np
.
random
.
randint
(
0
,
self
.
class_num
,
(
self
.
batch_size
,
1
),
dtype
=
"int64"
)
def
get_cross_entropy
(
self
):
self
.
cross_entropy
=
np
.
asmatrix
(
[[
-
np
.
log
(
self
.
x
[
i
][
self
.
label
[
i
][
0
]])]
if
self
.
label
[
i
][
0
]
!=
self
.
ignore_index
else
[
0
]
for
i
in
range
(
self
.
x
.
shape
[
0
])]).
astype
(
self
.
dtype
)
def
init_attr_type
(
self
):
self
.
soft_label
=
False
self
.
ignore_index
=
3
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float64
def
init_bs_class_num
(
self
):
self
.
batch_size
=
30
self
.
class_num
=
10
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_elementwise_add_ngraph_op.py
浏览文件 @
5998d3cc
...
...
@@ -13,18 +13,9 @@
# limitations under the License.
from
__future__
import
print_function
import
unittest
from
paddle.fluid.tests.unittests.test_elementwise_add_op
import
TestElementwiseAddOp
class
TestNGRAPHElementwiseAddOp
(
TestElementwiseAddOp
):
def
setUp
(
self
):
super
(
TestNGRAPHElementwiseAddOp
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_input_output
(
self
):
super
(
TestNGRAPHElementwiseAddOp
,
self
).
init_input_output
()
import
unittest
from
paddle.fluid.tests.unittests.test_elementwise_add_op
import
TestElementwiseAddOp
,
TestElementwiseAddOp_broadcast_0
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py
浏览文件 @
5998d3cc
...
...
@@ -13,24 +13,34 @@
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
paddle.fluid.tests.unittests.test_fill_constant_op
import
TestFillConstantOp1
,
TestFillConstantOp2
,
TestFillConstantOpWithSelectedRows
class
TestNGRAPHFillConstant
Op1
(
TestFillConstantOp1
):
class
TestNGRAPHFillConstant
FP64
(
TestFillConstantOp1
):
def
setUp
(
self
):
super
(
TestNGRAPHFillConstantOp1
,
self
).
setUp
()
super
(
TestNGRAPHFillConstantFP64
,
self
).
setUp
()
self
.
attrs
=
{
'shape'
:
[
123
,
92
],
'value'
:
3.8
,
'dtype'
:
6
}
self
.
outputs
=
{
'Out'
:
np
.
full
((
123
,
92
),
3.8
)}
class
TestNGRAPHFillConstant
Op
2
(
TestFillConstantOp2
):
class
TestNGRAPHFillConstant
INT3
2
(
TestFillConstantOp2
):
def
setUp
(
self
):
super
(
TestNGRAPHFillConstant
Op
2
,
self
).
setUp
()
super
(
TestNGRAPHFillConstant
INT3
2
,
self
).
setUp
()
self
.
attrs
=
{
'shape'
:
[
123
,
92
],
'dtype'
:
2
}
self
.
outputs
=
{
'Out'
:
np
.
full
((
123
,
92
),
0
)}
class
TestNGRAPHFillConstantOpWithSelectedRows
(
TestFillConstantOpWithSelectedRows
):
class
TestNGRAPHFillConstantINT64
(
TestFillConstantOp2
):
def
setUp
(
self
):
super
(
TestFillConstantOpWithSelectedRows
,
self
).
setUp
()
super
(
TestNGRAPHFillConstantINT64
,
self
).
setUp
()
self
.
attrs
=
{
'shape'
:
[
123
,
92
],
'dtype'
:
3
}
self
.
outputs
=
{
'Out'
:
np
.
full
((
123
,
92
),
0
)}
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/ngraph/test_mean_ngraph_op.py
浏览文件 @
5998d3cc
...
...
@@ -16,12 +16,5 @@ from __future__ import print_function
import
unittest
from
paddle.fluid.tests.unittests.test_mean_op
import
TestMeanOp
class
TestNGRAPHMeanOp
(
TestMeanOp
):
def
setUp
(
self
):
super
(
TestNGRAPHMeanOp
,
self
).
setUp
()
self
.
_cpu_only
=
True
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py
浏览文件 @
5998d3cc
...
...
@@ -15,39 +15,7 @@
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
paddle.fluid.tests.unittests.op_test
import
OpTest
class
TestNGRAPHMulOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"mul"
self
.
dtype
=
np
.
float32
self
.
init_dtype_type
()
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
2
,
4
)).
astype
(
self
.
dtype
),
'Y'
:
np
.
random
.
random
((
4
,
4
)).
astype
(
self
.
dtype
)
}
self
.
outputs
=
{
'Out'
:
np
.
dot
(
self
.
inputs
[
'X'
],
self
.
inputs
[
'Y'
])}
self
.
_cpu_only
=
True
def
init_dtype_type
(
self
):
pass
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad_normal
(
self
):
self
.
check_grad
([
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
0.5
)
def
test_check_grad_ingore_x
(
self
):
self
.
check_grad
(
[
'Y'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
"X"
))
def
test_check_grad_ingore_y
(
self
):
self
.
check_grad
(
[
'X'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
'Y'
))
from
paddle.fluid.tests.unittests.test_mul_op
import
TestMulOp
,
TestMulOp2
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py
浏览文件 @
5998d3cc
...
...
@@ -14,61 +14,25 @@
from
__future__
import
print_function
from
paddle.fluid.tests.unittests.test_pool2d_op
import
TestPool2D_Op
,
TestCase1
,
TestCase2
,
TestCase3
,
TestCase4
,
TestCase5
class
TestNGRAPHPool2D_Op
(
TestPool2D_Op
):
def
setUp
(
self
):
super
(
TestNGRAPHPool2D_Op
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_test_case
(
self
):
super
(
TestNGRAPHPool2D_Op
,
self
).
init_test_case
()
class
TestNGRAPHCase1
(
TestCase1
):
def
setUp
(
self
):
super
(
TestNGRAPHCase1
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_test_case
(
self
):
super
(
TestNGRAPHCase1
,
self
).
init_test_case
()
import
unittest
class
TestNGRAPHCase2
(
TestCase2
):
def
setUp
(
self
):
super
(
TestNGRAPHCase2
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_test_case
(
self
):
super
(
TestNGRAPHCase2
,
self
).
init_test_case
()
class
TestNGRAPHCase3
(
TestCase3
):
def
setUp
(
self
):
super
(
TestNGRAPHCase3
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_pool_type
(
self
):
super
(
TestNGRAPHCase3
,
self
).
init_pool_type
()
from
paddle.fluid.tests.unittests.test_pool2d_op
import
TestPool2D_Op
,
TestCase1
,
TestCase2
,
TestCase3
,
TestCase4
,
TestCase5
class
TestNGRAPHC
ase4
(
TestCase4
):
class
TestNGRAPHC
eilMode
(
TestCase1
):
def
setUp
(
self
):
super
(
TestNGRAPHCase4
,
self
).
setUp
()
self
.
_cpu_only
=
True
super
(
TestNGRAPHCeilMode
,
self
).
setUp
()
def
init_
pool_typ
e
(
self
):
s
uper
(
TestNGRAPHCase4
,
self
).
init_pool_type
()
def
init_
ceil_mod
e
(
self
):
s
elf
.
ceil_mode
=
True
class
TestNGRAPH
Case5
(
TestCase5
):
class
TestNGRAPH
Adaptive
(
TestCase1
):
def
setUp
(
self
):
super
(
TestNGRAPHCase5
,
self
).
setUp
()
self
.
_cpu_only
=
True
super
(
TestNGRAPHAdaptive
,
self
).
setUp
()
def
init_
pool_typ
e
(
self
):
s
uper
(
TestNGRAPHCase5
,
self
).
init_pool_type
()
def
init_
adaptiv
e
(
self
):
s
elf
.
adaptive
=
True
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/ngraph/test_scale_ngraph_op.py
浏览文件 @
5998d3cc
...
...
@@ -15,24 +15,5 @@ from __future__ import print_function
import
unittest
from
paddle.fluid.tests.unittests.test_scale_op
import
TestScaleOp
,
TestScaleOpSelectedRows
class
TestNGRAPHScaleOp
(
TestScaleOp
):
def
setUp
(
self
):
super
(
TestNGRAPHScaleOp
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_dtype_type
(
self
):
pass
class
TestNGRAPHScaleOpSelectedRows
(
TestScaleOpSelectedRows
):
def
setUp
(
self
):
super
(
TestNGRAPHScaleOpSelectedRows
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_dtype_type
(
self
):
pass
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_softmax_ngraph_op.py
浏览文件 @
5998d3cc
...
...
@@ -16,11 +16,5 @@ from __future__ import print_function
import
unittest
from
paddle.fluid.tests.unittests.test_softmax_op
import
TestSoftmaxOp
class
TestSoftmaxNGRAPHOp
(
TestSoftmaxOp
):
def
setUp
(
self
):
super
(
TestSoftmaxNGRAPHOp
,
self
).
setUp
()
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py
浏览文件 @
5998d3cc
...
...
@@ -16,30 +16,5 @@ from __future__ import print_function
import
unittest
from
paddle.fluid.tests.unittests.test_top_k_op
import
TestTopkOp
,
TestTopkOp3d
,
TestTopkOp2
,
TestTopkOp3
,
TestTopkOp4
class
TestNGRAPHTopkOp
(
TestTopkOp
):
def
setUp
(
self
):
super
(
TestNGRAPHTopkOp
,
self
).
setUp
()
self
.
_cpu_only
=
True
class
TestNGRAPHTopkOp2
(
TestTopkOp2
):
def
setUp
(
self
):
super
(
TestNGRAPHTopkOp2
,
self
).
setUp
()
self
.
_cpu_only
=
True
class
TestNGRAPHTopkOp3
(
TestTopkOp3
):
def
setUp
(
self
):
super
(
TestNGRAPHTopkOp3
,
self
).
setUp
()
self
.
_cpu_only
=
True
class
TestNGRAPHTopkOp4
(
TestTopkOp4
):
def
setUp
(
self
):
super
(
TestNGRAPHTopkOp4
,
self
).
setUp
()
self
.
_cpu_only
=
True
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
5998d3cc
...
...
@@ -14,6 +14,7 @@
from
__future__
import
print_function
import
os
import
unittest
import
numpy
as
np
import
random
...
...
@@ -374,6 +375,9 @@ class OpTest(unittest.TestCase):
return
[]
places
=
[
fluid
.
CPUPlace
()]
cpu_only
=
self
.
_cpu_only
if
hasattr
(
self
,
'_cpu_only'
)
else
False
use_ngraph
=
bool
(
os
.
getenv
(
"FLAGS_use_ngraph"
,
False
))
if
use_ngraph
:
cpu_only
=
True
if
core
.
is_compiled_with_cuda
()
and
core
.
op_support_gpu
(
self
.
op_type
)
\
and
not
cpu_only
:
places
.
append
(
core
.
CUDAPlace
(
0
))
...
...
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
浏览文件 @
5998d3cc
...
...
@@ -79,7 +79,7 @@ class TestParallelExecutorBase(unittest.TestCase):
if
use_reduce
else
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
build_strategy
.
fuse_elewise_add_act_ops
=
fuse_elewise_add_act_ops
build_strategy
.
fuse_relu_depthwise_conv
=
fuse_relu_depthwise_conv
build_strategy
.
memory_optimize
=
use_ir_memory_optimize
build_strategy
.
memory_optimize
=
False
if
memory_opt
else
use_ir_memory_optimize
# python memory optimization is conflict with inplace pass.
# Use ir graph memory optimization after inplace pass is the correct way.
build_strategy
.
enable_inplace
=
False
if
memory_opt
else
enable_inplace
...
...
python/paddle/fluid/tests/unittests/test_base_layer.py
0 → 100644
浏览文件 @
5998d3cc
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
from
paddle.fluid.layer_helper
import
LayerHelper
class
L1
(
fluid
.
imperative
.
Layer
):
def
__init__
(
self
):
super
(
L1
,
self
).
__init__
()
self
.
_helper
=
LayerHelper
(
'MyLayer'
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
self
.
w1
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_helper
.
param_attr
,
shape
=
[
2
,
2
],
dtype
=
'float32'
,
is_bias
=
False
)
self
.
w2
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_helper
.
param_attr
,
shape
=
[
2
,
2
],
dtype
=
'float32'
,
is_bias
=
False
)
def
forward
(
self
):
return
self
.
w1
+
self
.
w2
class
L2
(
fluid
.
imperative
.
Layer
):
def
__init__
(
self
):
super
(
L2
,
self
).
__init__
()
self
.
layer1
=
L1
()
self
.
layer2
=
L1
()
def
forward
(
self
):
return
self
.
layer1
()
+
self
.
layer2
()
class
L3
(
fluid
.
imperative
.
Layer
):
def
__init__
(
self
):
super
(
L3
,
self
).
__init__
()
self
.
layer1
=
L2
()
self
.
layer2
=
L2
()
def
forward
(
self
):
return
self
.
layer1
()
+
self
.
layer2
()
class
TestBaseLayer
(
unittest
.
TestCase
):
def
test_one_level
(
self
):
with
fluid
.
imperative
.
guard
():
l
=
L1
()
ret
=
l
()
self
.
assertEqual
(
l
.
w1
.
name
,
"MyLayer_0.w_0"
)
self
.
assertEqual
(
l
.
w2
.
name
,
"MyLayer_0.w_1"
)
self
.
assertTrue
(
np
.
allclose
(
ret
.
_numpy
(),
0.2
*
np
.
ones
([
2
,
2
])))
def
test_three_level
(
self
):
with
fluid
.
imperative
.
guard
():
l
=
L3
()
ret
=
l
()
self
.
assertTrue
(
np
.
allclose
(
ret
.
_numpy
(),
0.8
*
np
.
ones
([
2
,
2
])))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
浏览文件 @
5998d3cc
...
...
@@ -22,6 +22,9 @@ import six
import
unittest
import
numpy
as
np
import
gc
gc
.
set_debug
(
gc
.
DEBUG_COLLECTABLE
)
import
paddle.fluid
as
fluid
...
...
@@ -99,6 +102,12 @@ class TranspilerTest(unittest.TestCase):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
program_guard
(
main
,
startup
):
self
.
transpiler_test_impl
()
# NOTE: run gc.collect to eliminate pybind side objects to
# prevent random double-deallocate when inherited in python.
del
self
.
transpiler
del
main
del
startup
gc
.
collect
()
class
TestBasicModel
(
TranspilerTest
):
...
...
@@ -797,6 +806,7 @@ class TestNCCL2Transpile(TranspilerTest):
print
([
op
.
type
for
op
in
startup
.
global_block
().
ops
])
self
.
assertEqual
(
startup
.
global_block
().
ops
[
-
1
].
type
,
"gen_nccl_id"
)
self
.
assertIsNotNone
(
startup
.
global_block
().
vars
.
get
(
"NCCLID"
))
gc
.
collect
()
else
:
pass
...
...
python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
浏览文件 @
5998d3cc
...
...
@@ -121,6 +121,8 @@ class TestMNIST(TestParallelExecutorBase):
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-6
))
return
optimizer
# NOTE(dzh):
# need to make it compatible with elewise fuse act
not_fuse_op_first_loss
,
not_fuse_op_last_loss
=
self
.
check_network_convergence
(
model
,
feed_dict
=
{
"image"
:
img
,
...
...
@@ -128,6 +130,7 @@ class TestMNIST(TestParallelExecutorBase):
use_cuda
=
use_cuda
,
fuse_elewise_add_act_ops
=
False
,
memory_opt
=
False
,
use_ir_memory_optimize
=
False
,
optimizer
=
_optimizer
)
fuse_op_first_loss
,
fuse_op_last_loss
=
self
.
check_network_convergence
(
model
,
...
...
@@ -136,6 +139,7 @@ class TestMNIST(TestParallelExecutorBase):
use_cuda
=
use_cuda
,
fuse_elewise_add_act_ops
=
True
,
memory_opt
=
False
,
use_ir_memory_optimize
=
False
,
optimizer
=
_optimizer
)
for
loss
in
zip
(
not_fuse_op_first_loss
,
fuse_op_first_loss
):
...
...
python/paddle/fluid/tests/unittests/test_imperative.py
浏览文件 @
5998d3cc
...
...
@@ -333,6 +333,18 @@ class TestImperative(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
dy_out
,
static_out
))
self
.
assertTrue
(
np
.
allclose
(
dy_grad
,
static_grad
))
params
=
mlp
.
parameters
(
True
)
self
.
assertEqual
(
"FC_0.w_0"
,
params
[
0
].
name
)
self
.
assertEqual
(
"FC_0.b_0"
,
params
[
1
].
name
)
self
.
assertEqual
(
"FC_1.w_0"
,
params
[
2
].
name
)
self
.
assertEqual
(
"FC_1.b_0"
,
params
[
3
].
name
)
self
.
assertEqual
(
len
(
params
),
4
)
sublayers
=
mlp
.
sublayers
(
True
)
self
.
assertEqual
(
mlp
.
_fc1
,
sublayers
[
0
])
self
.
assertEqual
(
mlp
.
_fc2
,
sublayers
[
1
])
self
.
assertEqual
(
len
(
sublayers
),
2
)
def
test_rnn
(
self
):
np_inp
=
np
.
array
([[
1.0
,
2.0
,
3.0
],
[
4.0
,
5.0
,
6.0
],
[
7.0
,
8.0
,
9.0
],
[
10.0
,
11.0
,
12.0
]])
...
...
python/paddle/fluid/tests/unittests/test_imperative_gan.py
浏览文件 @
5998d3cc
...
...
@@ -33,9 +33,6 @@ class Discriminator(fluid.imperative.Layer):
self
.
_fc1
=
FC
(
size
=
32
,
act
=
'elu'
,
name
=
"d_fc1"
)
self
.
_fc2
=
FC
(
size
=
1
,
name
=
"d_fc2"
)
def
parameters
(
self
):
return
self
.
_fc1
.
parameters
()
+
self
.
_fc2
.
parameters
()
def
forward
(
self
,
inputs
):
x
=
self
.
_fc1
(
inputs
)
return
self
.
_fc2
(
x
)
...
...
@@ -48,10 +45,6 @@ class Generator(fluid.imperative.Layer):
self
.
_fc2
=
FC
(
size
=
64
,
act
=
'elu'
,
name
=
"g_fc2"
)
self
.
_fc3
=
FC
(
size
=
1
,
name
=
"g_fc3"
)
def
parameters
(
self
):
return
self
.
_fc1
.
parameters
()
+
self
.
_fc2
.
parameters
(
)
+
self
.
_fc3
.
parameters
()
def
forward
(
self
,
inputs
):
x
=
self
.
_fc1
(
inputs
)
x
=
self
.
_fc2
(
x
)
...
...
python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
浏览文件 @
5998d3cc
...
...
@@ -75,16 +75,6 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
self
.
hidden_array
.
append
(
pre_hidden
)
self
.
cell_array
.
append
(
pre_cell
)
def
parameters
(
self
):
parameters
=
list
()
for
param
in
self
.
weight_1_arr
:
parameters
.
append
(
param
)
for
param
in
self
.
weight_2_arr
:
parameters
.
append
(
param
)
for
bias
in
self
.
bias_arr
:
parameters
.
append
(
bias
)
return
parameters
def
forward
(
self
,
input_embedding
,
init_hidden
=
None
,
init_cell
=
None
):
res
=
[]
for
index
in
range
(
self
.
_num_steps
):
...
...
@@ -177,12 +167,6 @@ class PtbModel(fluid.imperative.Layer):
def
_build_once
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
pass
def
parameters
(
self
):
parameters
=
self
.
simple_lstm_rnn
.
parameters
()
+
[
self
.
softmax_weight
,
self
.
softmax_bias
]
+
self
.
embedding
.
parameters
()
return
parameters
def
forward
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
init_h
=
fluid
.
layers
.
reshape
(
...
...
python/paddle/fluid/tests/unittests/test_imperative_resnet.py
浏览文件 @
5998d3cc
...
...
@@ -21,7 +21,6 @@ import paddle
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.imperative.nn
import
Conv2D
,
Pool2D
,
BatchNorm
,
FC
from
paddle.fluid.imperative.base
import
to_variable
from
test_imperative_base
import
new_program_scope
...
...
@@ -173,11 +172,13 @@ class ResNet(fluid.imperative.Layer):
for
block
in
range
(
len
(
depth
)):
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
bottleneck_block
=
BottleneckBlock
(
bottleneck_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BottleneckBlock
(
num_channels
=
num_channels
,
num_filters
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
)
shortcut
=
shortcut
)
)
num_channels
=
bottleneck_block
.
_num_channels_out
self
.
bottleneck_block_list
.
append
(
bottleneck_block
)
shortcut
=
True
...
...
@@ -223,8 +224,7 @@ class TestImperativeResnet(unittest.TestCase):
batch_size
=
batch_size
)
dy_param_init_value
=
{}
for
param
in
fluid
.
default_main_program
().
global_block
(
).
all_parameters
():
for
param
in
resnet
.
parameters
():
dy_param_init_value
[
param
.
name
]
=
param
.
_numpy
()
for
batch_id
,
data
in
enumerate
(
train_reader
()):
...
...
@@ -247,16 +247,14 @@ class TestImperativeResnet(unittest.TestCase):
dy_out
=
avg_loss
.
_numpy
()
if
batch_id
==
0
:
for
param
in
fluid
.
default_main_program
().
global_block
(
).
all_parameters
():
for
param
in
resnet
.
parameters
():
if
param
.
name
not
in
dy_param_init_value
:
dy_param_init_value
[
param
.
name
]
=
param
.
_numpy
()
avg_loss
.
_backward
()
dy_grad_value
=
{}
for
param
in
fluid
.
default_main_program
().
global_block
(
).
all_parameters
():
for
param
in
resnet
.
parameters
():
if
not
param
.
stop_gradient
:
np_array
=
np
.
array
(
param
.
_ivar
.
_grad_ivar
().
value
()
.
get_tensor
())
...
...
@@ -267,8 +265,7 @@ class TestImperativeResnet(unittest.TestCase):
resnet
.
clear_gradients
()
dy_param_value
=
{}
for
param
in
fluid
.
default_main_program
().
global_block
(
).
all_parameters
():
for
param
in
resnet
.
parameters
():
dy_param_value
[
param
.
name
]
=
param
.
_numpy
()
with
new_program_scope
():
...
...
@@ -349,6 +346,7 @@ class TestImperativeResnet(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
static_out
,
dy_out
))
self
.
assertEqual
(
len
(
dy_param_init_value
),
len
(
static_param_init_value
))
for
key
,
value
in
six
.
iteritems
(
static_param_init_value
):
self
.
assertTrue
(
np
.
allclose
(
value
,
dy_param_init_value
[
key
]))
self
.
assertTrue
(
np
.
isfinite
(
value
.
all
()))
...
...
python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
0 → 100644
浏览文件 @
5998d3cc
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
"0.0"
os
.
environ
[
'RECORDIO_FILENAME'
]
=
'/tmp/ir_memory_optimize_transformer.wmt16.recordio'
from
test_parallel_executor_transformer
import
TestTransformer
from
test_parallel_executor_transformer
import
transformer
# NOTE(dzhwinter): test diferent strategy colisions.
# open the eager delete tensor strategy by default.
class
TestTransformerWithIR
(
TestTransformer
):
def
test_main
(
self
):
if
core
.
is_compiled_with_cuda
():
# check python transpiler
self
.
check_network_convergence
(
transformer
,
use_cuda
=
True
,
memory_opt
=
True
,
use_ir_memory_optimize
=
False
)
# check IR memory optimize
self
.
check_network_convergence
(
transformer
,
use_cuda
=
True
,
memory_opt
=
False
,
use_ir_memory_optimize
=
True
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/requirements.txt
浏览文件 @
5998d3cc
requests==2.9.2
numpy>=1.12
protobuf
==3.1
protobuf
>=3.6
recordio>=0.1.0
matplotlib==2.2.3 # TODO: let python3 paddlepaddle package use latest matplotlib
rarfile
...
...
tools/manylinux1/Dockerfile.x64
浏览文件 @
5998d3cc
...
...
@@ -31,10 +31,10 @@ RUN wget --no-check-certificate -qO- https://storage.googleapis.com/golang/go1.8
ENV GOROOT=/usr/local/go GOPATH=/root/gopath
ENV PATH=${GOROOT}/bin:${GOPATH}/bin:${PATH}
# protobuf 3.
1.0
RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf/releases/download/v3.
1.0/protobuf-cpp-3.1.0
.tar.gz && \
tar xzf protobuf-cpp-3.
1.0
.tar.gz && \
cd protobuf-3.
1.0 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.1.0
.tar.gz
# protobuf 3.
6.1
RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf/releases/download/v3.
6.1/protobuf-cpp-3.6.1
.tar.gz && \
tar xzf protobuf-cpp-3.
6.1
.tar.gz && \
cd protobuf-3.
6.1 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.6.1
.tar.gz
RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt -O /root/requirements.txt
...
...
tools/manylinux1/build_scripts/build.sh
浏览文件 @
5998d3cc
...
...
@@ -17,7 +17,7 @@ OPENSSL_ROOT=openssl-1.1.0i
OPENSSL_HASH
=
ebbfc844a8c8cc0ea5dc10b86c9ce97f401837f3fa08c17b2cdadc118253cf99
EPEL_RPM_HASH
=
e5ed9ecf22d0c4279e92075a64c757ad2b38049bcf5c16c4f2b75d5f6860dc0d
DEVTOOLS_HASH
=
a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc
PATCHELF_HASH
=
d9afdff4baeacfbc64861454f368b7f2c15c44d245293f7587bbf726bfe722fb
PATCHELF_HASH
=
f2aa40a6148cb3b0ca807a1bf836b081793e55ec9e5540a5356d800132be7e0a
CURL_ROOT
=
curl-7.49.1
CURL_HASH
=
eb63cec4bef692eab9db459033f409533e6d10e20942f4b060b32819e81885f1
AUTOCONF_ROOT
=
autoconf-2.69
...
...
@@ -107,11 +107,11 @@ curl-config --features
rm
-rf
/usr/local/ssl
# Install patchelf (latest with unreleased bug fixes)
curl
-sLO
http
://nipy.bic.berkeley.edu/manylinux/patchelf-0.9njs2
.tar.gz
check_sha256sum patchelf-0.9
njs2
.tar.gz
$PATCHELF_HASH
tar
-xzf
patchelf-0.9
njs2
.tar.gz
(
cd
patchelf-0.9
njs2
&&
./configure
&&
make
&&
make
install
)
rm
-rf
patchelf-0.9
njs2.tar.gz patchelf-0.9njs2
curl
-sLO
http
s://nixos.org/releases/patchelf/patchelf-0.9/patchelf-0.9
.tar.gz
check_sha256sum patchelf-0.9.tar.gz
$PATCHELF_HASH
tar
-xzf
patchelf-0.9.tar.gz
(
cd
patchelf-0.9
&&
./configure
&&
make
&&
make
install
)
rm
-rf
patchelf-0.9
.tar.gz patchelf-0.9
# Install latest pypi release of auditwheel
LD_LIBRARY_PATH
=
"
${
ORIGINAL_LD_LIBRARY_PATH
}
:
$(
dirname
${
PY35_BIN
}
)
/lib"
$PY35_BIN
/pip
install
auditwheel
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录