Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
32146857
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
32146857
编写于
3月 29, 2019
作者:
L
lujun
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into move-code
上级
d980ba19
d4f63d82
变更
133
显示空白变更内容
内联
并排
Showing
133 changed file
with
2315 addition
and
1160 deletion
+2315
-1160
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-1
paddle/fluid/framework/data_layout_transform.cc
paddle/fluid/framework/data_layout_transform.cc
+16
-7
paddle/fluid/framework/data_transform.cc
paddle/fluid/framework/data_transform.cc
+6
-24
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+7
-2
paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.cc
...framework/details/alloc_continuous_space_for_grad_pass.cc
+29
-19
paddle/fluid/framework/details/broadcast_op_handle.cc
paddle/fluid/framework/details/broadcast_op_handle.cc
+5
-8
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+39
-13
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+2
-1
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
...uid/framework/details/fast_threaded_ssa_graph_executor.cc
+3
-2
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h
...luid/framework/details/fast_threaded_ssa_graph_executor.h
+12
-7
paddle/fluid/framework/details/fuse_adam_op_pass.cc
paddle/fluid/framework/details/fuse_adam_op_pass.cc
+199
-0
paddle/fluid/framework/details/fuse_adam_op_pass.h
paddle/fluid/framework/details/fuse_adam_op_pass.h
+55
-0
paddle/fluid/framework/details/fuse_optimizer_op_pass.cc
paddle/fluid/framework/details/fuse_optimizer_op_pass.cc
+240
-0
paddle/fluid/framework/details/fuse_optimizer_op_pass.h
paddle/fluid/framework/details/fuse_optimizer_op_pass.h
+75
-0
paddle/fluid/framework/details/fuse_sgd_op_pass.cc
paddle/fluid/framework/details/fuse_sgd_op_pass.cc
+74
-0
paddle/fluid/framework/details/fuse_sgd_op_pass.h
paddle/fluid/framework/details/fuse_sgd_op_pass.h
+50
-0
paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
+23
-6
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+4
-1
paddle/fluid/framework/details/multi_devices_helper.h
paddle/fluid/framework/details/multi_devices_helper.h
+13
-13
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
...le/fluid/framework/details/threaded_ssa_graph_executor.cc
+4
-4
paddle/fluid/framework/details/threaded_ssa_graph_executor.h
paddle/fluid/framework/details/threaded_ssa_graph_executor.h
+9
-10
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+3
-0
paddle/fluid/framework/tensor.cc
paddle/fluid/framework/tensor.cc
+1
-1
paddle/fluid/framework/tensor.h
paddle/fluid/framework/tensor.h
+10
-34
paddle/fluid/framework/tensor_util.cc
paddle/fluid/framework/tensor_util.cc
+0
-5
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+16
-7
paddle/fluid/memory/allocation/aligned_allocator.h
paddle/fluid/memory/allocation/aligned_allocator.h
+0
-2
paddle/fluid/memory/allocation/allocator.cc
paddle/fluid/memory/allocation/allocator.cc
+3
-11
paddle/fluid/memory/allocation/allocator.h
paddle/fluid/memory/allocation/allocator.h
+6
-66
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+16
-32
paddle/fluid/memory/allocation/allocator_strategy.cc
paddle/fluid/memory/allocation/allocator_strategy.cc
+4
-10
paddle/fluid/memory/allocation/best_fit_allocator.cc
paddle/fluid/memory/allocation/best_fit_allocator.cc
+1
-1
paddle/fluid/memory/allocation/best_fit_allocator.h
paddle/fluid/memory/allocation/best_fit_allocator.h
+1
-1
paddle/fluid/memory/allocation/buffered_allocator.cc
paddle/fluid/memory/allocation/buffered_allocator.cc
+12
-10
paddle/fluid/memory/allocation/buffered_allocator.h
paddle/fluid/memory/allocation/buffered_allocator.h
+3
-3
paddle/fluid/memory/allocation/buffered_allocator_test.cc
paddle/fluid/memory/allocation/buffered_allocator_test.cc
+2
-1
paddle/fluid/memory/allocation/cpu_allocator.cc
paddle/fluid/memory/allocation/cpu_allocator.cc
+13
-15
paddle/fluid/memory/allocation/cpu_allocator.h
paddle/fluid/memory/allocation/cpu_allocator.h
+8
-2
paddle/fluid/memory/allocation/cuda_allocator.cc
paddle/fluid/memory/allocation/cuda_allocator.cc
+5
-5
paddle/fluid/memory/allocation/cuda_allocator.h
paddle/fluid/memory/allocation/cuda_allocator.h
+8
-1
paddle/fluid/memory/allocation/legacy_allocator.cc
paddle/fluid/memory/allocation/legacy_allocator.cc
+25
-27
paddle/fluid/memory/allocation/legacy_allocator.h
paddle/fluid/memory/allocation/legacy_allocator.h
+1
-1
paddle/fluid/memory/allocation/locked_allocator.cc
paddle/fluid/memory/allocation/locked_allocator.cc
+10
-9
paddle/fluid/memory/allocation/locked_allocator.h
paddle/fluid/memory/allocation/locked_allocator.h
+3
-3
paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc
...memory/allocation/naive_best_fit_allocator_facade_test.cc
+0
-91
paddle/fluid/memory/allocation/pinned_allocator.cc
paddle/fluid/memory/allocation/pinned_allocator.cc
+7
-2
paddle/fluid/memory/allocation/pinned_allocator.h
paddle/fluid/memory/allocation/pinned_allocator.h
+7
-1
paddle/fluid/memory/allocation/retry_allocator.cc
paddle/fluid/memory/allocation/retry_allocator.cc
+14
-4
paddle/fluid/memory/allocation/retry_allocator.h
paddle/fluid/memory/allocation/retry_allocator.h
+16
-7
paddle/fluid/memory/allocation/zero_size_allocator.cc
paddle/fluid/memory/allocation/zero_size_allocator.cc
+1
-10
paddle/fluid/memory/allocation/zero_size_allocator.h
paddle/fluid/memory/allocation/zero_size_allocator.h
+6
-1
paddle/fluid/operators/alloc_continuous_space_op.cc
paddle/fluid/operators/alloc_continuous_space_op.cc
+35
-10
paddle/fluid/operators/bpr_loss_op.cc
paddle/fluid/operators/bpr_loss_op.cc
+19
-1
paddle/fluid/operators/detection/roi_perspective_transform_op.cc
...fluid/operators/detection/roi_perspective_transform_op.cc
+20
-1
paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
...operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
+13
-6
paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
+8
-2
paddle/fluid/operators/im2sequence_op.cc
paddle/fluid/operators/im2sequence_op.cc
+18
-1
paddle/fluid/operators/interpolate_op.cc
paddle/fluid/operators/interpolate_op.cc
+32
-6
paddle/fluid/operators/l1_norm_op.cc
paddle/fluid/operators/l1_norm_op.cc
+18
-1
paddle/fluid/operators/label_smooth_op.cc
paddle/fluid/operators/label_smooth_op.cc
+19
-5
paddle/fluid/operators/linear_chain_crf_op.cc
paddle/fluid/operators/linear_chain_crf_op.cc
+36
-3
paddle/fluid/operators/log_loss_op.cc
paddle/fluid/operators/log_loss_op.cc
+19
-1
paddle/fluid/operators/lstm_op.cc
paddle/fluid/operators/lstm_op.cc
+41
-1
paddle/fluid/operators/margin_rank_loss_op.cc
paddle/fluid/operators/margin_rank_loss_op.cc
+20
-3
paddle/fluid/operators/mean_op.cc
paddle/fluid/operators/mean_op.cc
+9
-2
paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
+17
-7
paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc
paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc
+25
-11
paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
+2
-1
paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
+24
-23
paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc
paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc
+2
-1
paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc
paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc
+2
-6
paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
+13
-8
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
+0
-8
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
+5
-4
paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
+5
-21
paddle/fluid/operators/multiplex_op.cc
paddle/fluid/operators/multiplex_op.cc
+28
-7
paddle/fluid/operators/multiplex_op.cu
paddle/fluid/operators/multiplex_op.cu
+8
-3
paddle/fluid/operators/multiplex_op.h
paddle/fluid/operators/multiplex_op.h
+8
-3
paddle/fluid/operators/pad_op.cc
paddle/fluid/operators/pad_op.cc
+14
-7
paddle/fluid/operators/psroi_pool_op.cc
paddle/fluid/operators/psroi_pool_op.cc
+19
-1
paddle/fluid/operators/rank_loss_op.cc
paddle/fluid/operators/rank_loss_op.cc
+20
-0
paddle/fluid/operators/roi_align_op.cc
paddle/fluid/operators/roi_align_op.cc
+19
-1
paddle/fluid/operators/roi_pool_op.cc
paddle/fluid/operators/roi_pool_op.cc
+20
-1
paddle/fluid/operators/scatter_op.cc
paddle/fluid/operators/scatter_op.cc
+30
-5
paddle/fluid/operators/shuffle_channel_op.cc
paddle/fluid/operators/shuffle_channel_op.cc
+18
-2
paddle/fluid/platform/mkldnn_reuse.h
paddle/fluid/platform/mkldnn_reuse.h
+33
-40
paddle/fluid/platform/mkldnn_utils.h
paddle/fluid/platform/mkldnn_utils.h
+0
-69
paddle/fluid/platform/temporary_allocator.cc
paddle/fluid/platform/temporary_allocator.cc
+19
-9
paddle/fluid/platform/temporary_allocator.h
paddle/fluid/platform/temporary_allocator.h
+11
-3
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+9
-1
paddle/fluid/string/printf.h
paddle/fluid/string/printf.h
+4
-2
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+2
-2
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
...ddle/fluid/contrib/slim/quantization/quantization_pass.py
+65
-55
python/paddle/fluid/contrib/slim/quantization/quantization_strategy.py
.../fluid/contrib/slim/quantization/quantization_strategy.py
+11
-5
python/paddle/fluid/contrib/slim/tests/quantization/compress.yaml
...addle/fluid/contrib/slim/tests/quantization/compress.yaml
+2
-0
python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
...paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+0
-3
python/paddle/fluid/dygraph/__init__.py
python/paddle/fluid/dygraph/__init__.py
+0
-0
python/paddle/fluid/dygraph/base.py
python/paddle/fluid/dygraph/base.py
+4
-4
python/paddle/fluid/dygraph/checkpoint.py
python/paddle/fluid/dygraph/checkpoint.py
+5
-5
python/paddle/fluid/dygraph/layer_object_helper.py
python/paddle/fluid/dygraph/layer_object_helper.py
+1
-1
python/paddle/fluid/dygraph/layers.py
python/paddle/fluid/dygraph/layers.py
+1
-1
python/paddle/fluid/dygraph/nn.py
python/paddle/fluid/dygraph/nn.py
+4
-4
python/paddle/fluid/dygraph/profiler.py
python/paddle/fluid/dygraph/profiler.py
+0
-0
python/paddle/fluid/dygraph/tracer.py
python/paddle/fluid/dygraph/tracer.py
+2
-2
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+54
-86
python/paddle/fluid/initializer.py
python/paddle/fluid/initializer.py
+8
-8
python/paddle/fluid/install_check.py
python/paddle/fluid/install_check.py
+1
-1
python/paddle/fluid/layer_helper.py
python/paddle/fluid/layer_helper.py
+3
-3
python/paddle/fluid/layer_helper_base.py
python/paddle/fluid/layer_helper_base.py
+6
-6
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+51
-12
python/paddle/fluid/layers/tensor.py
python/paddle/fluid/layers/tensor.py
+0
-1
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+3
-4
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+29
-23
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
...ddle/fluid/tests/unittests/parallel_executor_test_base.py
+2
-0
python/paddle/fluid/tests/unittests/test_alloc_continuous_space_op.py
...e/fluid/tests/unittests/test_alloc_continuous_space_op.py
+33
-10
python/paddle/fluid/tests/unittests/test_base_layer.py
python/paddle/fluid/tests/unittests/test_base_layer.py
+5
-5
python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py
.../paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py
+135
-0
python/paddle/fluid/tests/unittests/test_gru_op.py
python/paddle/fluid/tests/unittests/test_gru_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_imperative_basic.py
python/paddle/fluid/tests/unittests/test_imperative_basic.py
+24
-24
python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py
...addle/fluid/tests/unittests/test_imperative_checkpoint.py
+8
-8
python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
...on/paddle/fluid/tests/unittests/test_imperative_deepcf.py
+14
-14
python/paddle/fluid/tests/unittests/test_imperative_gan.py
python/paddle/fluid/tests/unittests/test_imperative_gan.py
+6
-6
python/paddle/fluid/tests/unittests/test_imperative_gnn.py
python/paddle/fluid/tests/unittests/test_imperative_gnn.py
+6
-6
python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
...paddle/fluid/tests/unittests/test_imperative_optimizer.py
+6
-6
python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
...n/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+6
-6
python/paddle/fluid/tests/unittests/test_imperative_resnet.py
...on/paddle/fluid/tests/unittests/test_imperative_resnet.py
+8
-8
python/paddle/fluid/tests/unittests/test_imperative_transformer.py
...ddle/fluid/tests/unittests/test_imperative_transformer.py
+3
-3
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+3
-3
python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py
...addle/fluid/tests/unittests/test_parallel_executor_crf.py
+61
-54
python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
...e/fluid/tests/unittests/test_parallel_executor_dry_run.py
+9
-8
python/paddle/fluid/tests/unittests/test_variable.py
python/paddle/fluid/tests/unittests/test_variable.py
+1
-2
python/setup.py.in
python/setup.py.in
+1
-1
tools/print_signatures.py
tools/print_signatures.py
+1
-1
未找到文件。
paddle/fluid/API.spec
浏览文件 @
32146857
...
...
@@ -134,7 +134,7 @@ paddle.fluid.layers.sampled_softmax_with_cross_entropy (ArgSpec(args=['logits',
paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)), ('document', '80641ee6810b1cdc3fd6e14fc89ecc9d'))
paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', 'b350b9a30a18e7efd7e1bb740eef6996'))
paddle.fluid.layers.row_conv (ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)), ('document', '17485788fffe4e2d36dc58c2ac8d174e'))
paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '
013795af319e2e86d3506741941078ee
'))
paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '
2c4d1ae83da6ed35e3b36ba1b3b51d23
'))
paddle.fluid.layers.layer_norm (ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)), ('document', 'de6a906950bae9f3c245cb744d22b94e'))
paddle.fluid.layers.group_norm (ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)), ('document', '419c3a24a83cc89219a029cf4092788b'))
paddle.fluid.layers.spectral_norm (ArgSpec(args=['weight', 'dim', 'power_iters', 'eps', 'name'], varargs=None, keywords=None, defaults=(0, 1, 1e-12, None)), ('document', '3f536aafba30d793287b52d231baff1b'))
...
...
paddle/fluid/framework/data_layout_transform.cc
浏览文件 @
32146857
...
...
@@ -134,6 +134,11 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
out_layout
=
out_layout
==
DataLayout
::
kAnyLayout
?
DataLayout
::
kNCHW
:
out_layout
;
auto
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
dynamic_cast
<
platform
::
MKLDNNDeviceContext
*>
(
pool
.
Get
(
expected_kernel_type
.
place_
));
auto
&
cpu_engine
=
dev_ctx
->
GetEngine
();
std
::
vector
<
int
>
in_tz
=
paddle
::
framework
::
vectorize2int
(
in
.
dims
());
std
::
vector
<
int
>
out_tz
=
in_tz
;
...
...
@@ -142,25 +147,29 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
"Input tensor type is not supported: %s"
,
in
.
type
());
memory
::
data_type
out_type
=
in_type
;
auto
in_format
=
platform
::
MKLDNNFormatForSize
(
in_tz
.
size
(),
in
.
format
());
auto
out_format
=
platform
::
MKLDNNFormatForSize
(
in_tz
.
size
(),
ToMKLDNNFormat
(
out_layout
));
// output tensor has the same dims as input. Reorder don't change dims
out
->
Resize
(
in
.
dims
());
// tempory mem pd fr out , to make reorder
auto
out_mem_pd
=
paddle
::
platform
::
create_prim_desc_from_dims
(
paddle
::
framework
::
vectorize2int
(
out
->
dims
()),
mkldnn
::
memory
::
format
::
blocked
,
out_type
);
if
(
in
.
get_mkldnn_prim_desc
()
!=
out_mem_pd
)
{
if
(
in_format
!=
out_format
)
{
void
*
in_data
=
GetDataFromTensor
(
in
,
in_type
);
auto
out_data
=
out
->
mutable_data
(
expected_kernel_type
.
place_
,
in
.
type
());
auto
in_memory
=
memory
(
in
.
get_mkldnn_prim_desc
(),
in_data
);
auto
out_memory
=
memory
(
out_mem_pd
,
out_data
);
auto
in_memory
=
memory
({{{
in_tz
},
in_type
,
in_format
},
cpu_engine
},
in_data
);
auto
out_memory
=
memory
({{{
out_tz
},
out_type
,
out_format
},
cpu_engine
},
out_data
);
platform
::
Reorder
(
in_memory
,
out_memory
);
}
else
{
out
->
ShareDataWith
(
in
);
}
out
->
set_layout
(
out_layout
);
// reset format since the out tensor will be feed to non-MKLDNN OPkernel
out
->
set_format
(
memory
::
format
::
format_undef
);
#endif
}
...
...
paddle/fluid/framework/data_transform.cc
浏览文件 @
32146857
...
...
@@ -51,31 +51,13 @@ void TransformData(const OpKernelType &expected_kernel_type,
#ifdef PADDLE_WITH_MKLDNN
// Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
// Just set layout/format. No real transform occur
out
.
ShareDataWith
(
input_tensor
);
// TODO(jczaja): Remove that once all mkldnn ops
// are modified to work with mkldnn_blocked
auto
mkldnn_fmt
=
[
&
](
int
rank
)
{
switch
(
rank
)
{
case
5
:
return
mkldnn
::
memory
::
format
::
ncdhw
;
case
4
:
return
mkldnn
::
memory
::
format
::
nchw
;
case
3
:
return
mkldnn
::
memory
::
format
::
ncw
;
case
2
:
return
mkldnn
::
memory
::
format
::
nc
;
case
1
:
return
mkldnn
::
memory
::
format
::
x
;
default:
return
mkldnn
::
memory
::
format
::
blocked
;
}
};
auto
out_mem_pd
=
paddle
::
platform
::
create_prim_desc_from_dims
(
paddle
::
framework
::
vectorize2int
(
out
.
dims
()),
mkldnn_fmt
(
out
.
dims
().
size
()));
auto
out_format
=
platform
::
MKLDNNFormatForSize
(
in
.
dims
().
size
(),
ToMKLDNNFormat
(
lin
));
out
.
set_mkldnn_prim_desc
(
out_mem_pd
);
out
.
ShareDataWith
(
input_tensor
);
out
.
set_layout
(
DataLayout
::
kMKLDNN
);
out
.
set_format
(
out_format
);
#endif
}
else
{
// Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
...
...
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
32146857
...
...
@@ -10,7 +10,10 @@ cc_library(fetch_barrier_op_handle SRCS fetch_barrier_op_handle.cc DEPS framewor
cc_library
(
multi_devices_helper SRCS multi_devices_helper.cc DEPS graph graph_helper
)
cc_library
(
multi_devices_graph_print_pass SRCS multi_devices_graph_print_pass.cc DEPS multi_devices_helper
)
cc_library
(
multi_devices_graph_check_pass SRCS multi_devices_graph_check_pass.cc DEPS multi_devices_helper
)
cc_library
(
alloc_continuous_space_for_grad_pass SRCS alloc_continuous_space_for_grad_pass.cc DEPS graph graph_helper
)
cc_library
(
fuse_adam_op_pass SRCS fuse_adam_op_pass.cc fuse_optimizer_op_pass.cc DEPS graph graph_helper
)
cc_library
(
fuse_sgd_op_pass SRCS fuse_sgd_op_pass.cc fuse_optimizer_op_pass.cc DEPS graph graph_helper
)
cc_library
(
variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_rows
)
...
...
@@ -105,4 +108,6 @@ cc_library(build_strategy SRCS build_strategy.cc DEPS
multi_devices_graph_print_pass multi_devices_graph_check_pass
fuse_elewise_add_act_pass multi_batch_merge_pass
fuse_relu_depthwise_conv_pass
memory_optimize_pass lock_free_optimize_pass alloc_continuous_space_for_grad_pass fuse_all_reduce_op_pass
)
memory_optimize_pass lock_free_optimize_pass
alloc_continuous_space_for_grad_pass fuse_all_reduce_op_pass
fuse_adam_op_pass fuse_sgd_op_pass
)
paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.cc
浏览文件 @
32146857
...
...
@@ -21,6 +21,7 @@
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/op_registry.h"
DEFINE_uint32
(
fuse_parameter_memory_size
,
0
,
// 0 KB
"fuse_parameter_memory_size is up limited memory size "
"of one group parameters' gradient which is the input "
...
...
@@ -105,20 +106,29 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
auto
ele_dtype
=
iter
->
second
->
Var
()
->
GetDataType
();
if
(
dtype
==
kDefaultDtype
)
{
dtype
=
ele_dtype
;
PADDLE_ENFORCE_NE
(
ele_dtype
,
kDefaultDtype
);
PADDLE_ENFORCE_NE
(
ele_dtype
,
kDefaultDtype
,
"The data type should not be bool."
);
}
PADDLE_ENFORCE_EQ
(
ele_dtype
,
dtype
);
PADDLE_ENFORCE_EQ
(
ele_dtype
,
dtype
,
"The data type of input is not consistent."
);
}
// Create the fused variable name.
// Create a FusedVarsSet to avoid duplicating names for fused_var in other
// pass.
if
(
!
result
.
Has
(
kFusedVars
))
{
result
.
Set
(
kFusedVars
,
new
FusedVars
);
}
const
std
::
string
prefix
(
kFusedVarNamePrefix
);
// The fused_var_name should be unique.
auto
fused_var_name
=
prefix
+
"GRAD@"
+
params_grads
[
0
].
second
;
// the kFusedGrads is used be fuse_optimizer_op_pass.
result
.
Set
(
kFusedGrads
,
new
FusedGrads
);
// the fused_var_name should be unique, so it appends
// params_grads.begin()->second.
auto
fused_var_name
=
std
::
string
(
kFusedVarNamePrefix
)
+
"@GRAD@"
+
params_grads
.
begin
()
->
second
;
result
.
Get
<
FusedGrads
>
(
kFusedGrads
)
=
fused_var_name
;
auto
&
fused_var_set
=
result
.
Get
<
FusedVars
>
(
kFusedVars
);
PADDLE_ENFORCE_EQ
(
fused_var_set
.
count
(
fused_var_name
),
0
);
PADDLE_ENFORCE_EQ
(
fused_var_set
.
count
(
fused_var_name
),
0
,
"%s is duplicate in FusedVars."
,
fused_var_name
);
fused_var_set
.
insert
(
fused_var_name
);
InitFusedVarsAndAllocSpaceForVars
(
places
,
local_scopes
,
vars
,
...
...
@@ -295,17 +305,6 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
return
type
==
proto
::
VarType
::
LOD_TENSOR
;
}
void
AppendAllocSpaceForVarsOp
(
const
std
::
vector
<
std
::
string
>
&
params_name
,
const
std
::
vector
<
std
::
string
>
&
grads_name
,
const
std
::
string
&
fused_var_name
,
BlockDesc
*
global_block
)
const
{
auto
op_desc
=
global_block
->
AppendOp
();
op_desc
->
SetType
(
"alloc_continuous_space"
);
op_desc
->
SetInput
(
"Input"
,
params_name
);
op_desc
->
SetOutput
(
"Output"
,
grads_name
);
op_desc
->
SetOutput
(
"FusedOutput"
,
{
fused_var_name
});
}
void
RecordParamsAndGrads
(
ir
::
Node
*
node
,
ParamsAndGrads
*
params_grads
)
const
{
try
{
...
...
@@ -358,6 +357,7 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
}
}
// Alloc continuous space for vars.
std
::
vector
<
std
::
string
>
grads_name
;
std
::
vector
<
std
::
string
>
params_name
;
grads_name
.
reserve
(
params_grads
.
size
());
...
...
@@ -370,7 +370,6 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
AppendAllocSpaceForVarsOp
(
params_name
,
grads_name
,
fused_var_name
,
program_desc
.
MutableBlock
(
0
));
// Run Only Once Programs
for
(
size_t
i
=
0
;
i
<
local_scopes
.
size
();
++
i
)
{
for
(
auto
&
op_desc
:
program_desc
.
Block
(
0
).
AllOps
())
{
auto
op
=
OpRegistry
::
CreateOp
(
*
op_desc
);
...
...
@@ -378,6 +377,17 @@ class AllocContinuousSpaceForGradPass : public ir::Pass {
}
}
}
void
AppendAllocSpaceForVarsOp
(
const
std
::
vector
<
std
::
string
>
&
params_name
,
const
std
::
vector
<
std
::
string
>
&
grads_name
,
const
std
::
string
&
fused_var_name
,
BlockDesc
*
global_block
)
const
{
auto
op_desc
=
global_block
->
AppendOp
();
op_desc
->
SetType
(
"alloc_continuous_space"
);
op_desc
->
SetInput
(
"Input"
,
params_name
);
op_desc
->
SetOutput
(
"Output"
,
grads_name
);
op_desc
->
SetOutput
(
"FusedOutput"
,
{
fused_var_name
});
}
};
}
// namespace details
...
...
paddle/fluid/framework/details/broadcast_op_handle.cc
浏览文件 @
32146857
...
...
@@ -27,20 +27,17 @@ void BroadcastOpHandle::RunImpl() {
if
(
places_
.
size
()
==
1
)
return
;
// The input and output may have dummy vars.
VarHandle
*
in_var_handle
;
{
auto
in_var_handles
=
DynamicCast
<
VarHandle
>
(
inputs_
);
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
1UL
,
"The number of input should be one."
);
in_var_handle
=
in_var_handles
[
0
];
}
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
outputs_
);
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
1UL
,
"The number of input should be one."
);
PADDLE_ENFORCE_EQ
(
out_var_handles
.
size
(),
places_
.
size
(),
"The number of output should equal to the number of places."
);
VarHandle
*
in_var_handle
=
in_var_handles
[
0
];
WaitInputVarGenerated
();
std
::
vector
<
const
Scope
*>
var_scopes
;
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
32146857
...
...
@@ -17,7 +17,6 @@ limitations under the License. */
#include <glog/logging.h>
#include <memory>
#include <utility>
#include "paddle/fluid/framework/details/memory_optimize_helper.h"
#include "paddle/fluid/framework/details/multi_devices_graph_pass.h"
#include "paddle/fluid/framework/details/multi_devices_graph_print_pass.h"
...
...
@@ -82,23 +81,43 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
AppendPass
(
"inplace_pass"
);
}
if
(
strategy
.
fuse_elewise_add_act_ops_
)
{
if
(
strategy
_
.
fuse_elewise_add_act_ops_
)
{
VLOG
(
10
)
<<
"Add fuse_elewise_add_act_pass"
;
AppendPass
(
"fuse_elewise_add_act_pass"
);
}
// for single card training, fuse_all_reduce_ops is unnecessary.
// alloc_continuous_space_for_grad_pass should be before of MultiDevPass.
if
(
strategy
.
fuse_all_reduce_ops_
)
{
if
(
strategy_
.
fuse_all_reduce_ops_
)
{
VLOG
(
10
)
<<
"Add alloc_continuous_space_for_grad_pass"
;
AppendPass
(
"alloc_continuous_space_for_grad_pass"
);
}
if
(
strategy_
.
fuse_all_optimizer_ops_
)
{
if
(
strategy_
.
reduce_
==
BuildStrategy
::
ReduceStrategy
::
kReduce
||
strategy_
.
is_distribution_
)
{
VLOG
(
3
)
<<
"Currently, fuse_all_optimizer_ops only works under AllReduce "
"mode."
;
strategy_
.
fuse_all_optimizer_ops_
=
false
;
}
else
{
VLOG
(
10
)
<<
"Add alloc_continuous_space_for_grad_pass"
;
AppendPass
(
"alloc_continuous_space_for_grad_pass"
);
// NOTE: fuse_all_xx_ops will count the number of xx operator first,
// if the number is zero, fuse_all_reduce_ops will do nothing.
// Currently, only one type of optimization algorithm can be fused.
VLOG
(
10
)
<<
"Add fuse_adam_op_pass"
;
AppendPass
(
"fuse_adam_op_pass"
);
VLOG
(
10
)
<<
"Add fuse_sgd_op_pass"
;
AppendPass
(
"fuse_sgd_op_pass"
);
}
}
// Add a graph viz pass to record a graph.
if
(
!
strategy
.
debug_graphviz_path_
.
empty
())
{
auto
viz_pass
=
AppendPass
(
"graph_viz_pass"
);
const
std
::
string
graph_path
=
string
::
Sprintf
(
"%s%s"
,
strategy
.
debug_graphviz_path_
.
c_str
(),
"_fused_graph"
);
"%s%s"
,
strategy
_
.
debug_graphviz_path_
.
c_str
(),
"_fused_graph"
);
viz_pass
->
Set
<
std
::
string
>
(
"graph_viz_path"
,
new
std
::
string
(
graph_path
));
}
...
...
@@ -118,14 +137,14 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
// the de-fact IR, any reuse on Graph is meaningless.
// A side-effect of that, memory optimize cannot forsee the fetched vars
// , so fetchlist should be set persistable before call the Run interface.
if
(
strategy
.
memory_optimize_
)
{
if
(
strategy
_
.
memory_optimize_
)
{
VLOG
(
10
)
<<
"Add memory_optimize_pass"
;
AppendPass
(
"memory_optimize_pass"
);
}
AppendMultiDevPass
(
strategy
);
AppendMultiDevPass
(
strategy
_
);
if
(
strategy
.
fuse_all_reduce_ops_
)
{
if
(
strategy
_
.
fuse_all_reduce_ops_
)
{
// NOTE: fuse_all_reduce_ops will count the number of all_reduce operator
// first, if the number is zero, fuse_all_reduce_ops will do nothing.
VLOG
(
10
)
<<
"Add fuse_all_reduce_op_pass"
;
...
...
@@ -151,7 +170,7 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
AppendPass
(
"all_reduce_deps_pass"
);
}
if
(
SeqOnlyAllReduceOps
(
strategy
))
{
if
(
SeqOnlyAllReduceOps
(
strategy
_
))
{
VLOG
(
10
)
<<
"Add all_reduce_deps_pass"
;
AppendPass
(
"all_reduce_deps_pass"
);
}
...
...
@@ -165,7 +184,7 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
// Convert graph to run on multi-devices.
void
AppendMultiDevPass
(
const
BuildStrategy
&
strategy
)
{
ir
::
Pass
*
multi_devices_pass
=
nullptr
;
if
(
strategy
_
.
is_distribution_
)
{
if
(
strategy
.
is_distribution_
)
{
VLOG
(
10
)
<<
"Add dist_multi_devices_pass"
;
multi_devices_pass
=
AppendPass
(
"dist_multi_devices_pass"
).
get
();
}
else
{
...
...
@@ -235,17 +254,22 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph,
pass
->
Erase
(
kNCCLCtxs
);
pass
->
SetNotOwned
<
platform
::
NCCLContextMap
>
(
kNCCLCtxs
,
nctx
);
#endif
}
else
if
(
pass
->
Type
()
==
"fuse_all_reduce_op_pass"
)
{
}
else
if
(
pass
->
Type
()
==
"alloc_continuous_space_for_grad_pass"
||
pass
->
Type
()
==
"fuse_adam_op_pass"
||
pass
->
Type
()
==
"fuse_sgd_op_pass"
||
pass
->
Type
()
==
"fuse_all_reduce_op_pass"
)
{
pass
->
Erase
(
kPlaces
);
pass
->
SetNotOwned
<
const
std
::
vector
<
platform
::
Place
>>
(
kPlaces
,
&
places
);
pass
->
Erase
(
kLocalScopes
);
pass
->
SetNotOwned
<
const
std
::
vector
<
Scope
*>>
(
kLocalScopes
,
&
local_scopes
);
if
(
pass
->
Type
()
==
"fuse_all_reduce_op_pass"
)
{
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
platform
::
NCCLContextMap
*
nctx
=
use_cuda
?
nccl_ctxs
:
nullptr
;
pass
->
Erase
(
kNCCLCtxs
);
pass
->
SetNotOwned
<
platform
::
NCCLContextMap
>
(
kNCCLCtxs
,
nctx
);
#endif
}
}
else
if
(
pass
->
Type
()
==
"alloc_continuous_space_for_grad_pass"
)
{
pass
->
Erase
(
kPlaces
);
pass
->
SetNotOwned
<
const
std
::
vector
<
platform
::
Place
>>
(
kPlaces
,
&
places
);
...
...
@@ -294,4 +318,6 @@ USE_PASS(inplace_pass);
USE_PASS
(
lock_free_optimize_pass
);
USE_PASS
(
alloc_continuous_space_for_grad_pass
);
USE_PASS
(
graph_to_program_pass
);
USE_PASS
(
fuse_adam_op_pass
);
USE_PASS
(
fuse_sgd_op_pass
);
USE_PASS
(
fuse_all_reduce_op_pass
);
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
32146857
...
...
@@ -18,7 +18,6 @@
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/ir/pass_builder.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
...
...
@@ -76,6 +75,8 @@ struct BuildStrategy {
bool
fuse_elewise_add_act_ops_
{
false
};
bool
fuse_all_optimizer_ops_
{
false
};
bool
fuse_all_reduce_ops_
{
false
};
bool
fuse_relu_depthwise_conv_
{
false
};
...
...
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
浏览文件 @
32146857
...
...
@@ -31,9 +31,10 @@ FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor(
local_scopes_
(
local_scopes
),
places_
(
places
),
graph_
(
graph
),
fetch_ctxs_
(
places
),
pool_
(
strategy
.
num_threads_
),
prepare_pool_
(
1
),
// add one more thread for generate op_deps
fetch_ctxs_
(
places
)
{
// add one more thread for generate op_deps
prepare_pool_
(
1
)
{
for
(
auto
&
op
:
ir
::
FilterByNodeWrapper
<
OpHandleBase
>
(
*
graph_
))
{
int
dep
=
static_cast
<
int
>
(
op
->
NotReadyInputSize
());
op_deps_
.
emplace
(
op
,
dep
);
...
...
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h
浏览文件 @
32146857
...
...
@@ -14,7 +14,9 @@
#pragma once
#include <ThreadPool.h>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/blocking_queue.h"
#include "paddle/fluid/framework/details/exception_holder.h"
...
...
@@ -37,6 +39,8 @@ class FastThreadedSSAGraphExecutor : public SSAGraphExecutor {
const
ir
::
Graph
&
Graph
()
const
override
;
private:
// Note(zcd): the ThreadPool should be placed last so that ThreadPool should
// be destroyed first.
ExecutionStrategy
strategy_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
platform
::
Place
>
places_
;
...
...
@@ -45,21 +49,22 @@ class FastThreadedSSAGraphExecutor : public SSAGraphExecutor {
std
::
unordered_map
<
OpHandleBase
*
,
int
>
op_deps_
;
std
::
vector
<
OpHandleBase
*>
bootstrap_ops_
;
::
ThreadPool
pool_
;
::
ThreadPool
prepare_pool_
;
platform
::
DeviceContextPool
fetch_ctxs_
;
std
::
atomic
<
int
>
remaining_
;
std
::
future
<
std
::
unique_ptr
<
std
::
unordered_map
<
OpHandleBase
*
,
std
::
atomic
<
int
>>>>
atomic_op_deps_
;
ExceptionHolder
exception_
;
::
ThreadPool
pool_
;
::
ThreadPool
prepare_pool_
;
void
RunOpAsync
(
std
::
unordered_map
<
OpHandleBase
*
,
std
::
atomic
<
int
>>
*
op_deps
,
OpHandleBase
*
op
,
const
std
::
shared_ptr
<
BlockingQueue
<
size_t
>>
&
complete_q
);
void
PrepareAtomicOpDeps
();
std
::
future
<
std
::
unique_ptr
<
std
::
unordered_map
<
OpHandleBase
*
,
std
::
atomic
<
int
>>>>
atomic_op_deps_
;
ExceptionHolder
exception_
;
};
}
// namespace details
}
// namespace framework
...
...
paddle/fluid/framework/details/fuse_adam_op_pass.cc
0 → 100644
浏览文件 @
32146857
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/fuse_adam_op_pass.h"
#include <algorithm>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
const
std
::
string
FuseAdamOpPass
::
GetOpType
()
const
{
return
"adam"
;
}
const
std
::
vector
<
std
::
string
>
FuseAdamOpPass
::
GetAuxiliaryVarNames
()
const
{
return
{
"Param"
,
"Moment1"
,
"Moment2"
,
"Beta1Pow"
,
"Beta2Pow"
};
}
void
FuseAdamOpPass
::
FuseOptimizerOps
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
aux_var_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
const
std
::
vector
<
ir
::
Node
*>
&
adam_ops
,
ir
::
Graph
*
graph
)
const
{
FuseAdamOps
(
aux_var_set
,
fused_vars_name
,
adam_ops
,
graph
);
FuseScaleOps
(
aux_var_set
.
at
(
"Beta1Pow"
),
fused_vars_name
.
at
(
"Beta1Pow"
),
adam_ops
,
graph
);
FuseScaleOps
(
aux_var_set
.
at
(
"Beta2Pow"
),
fused_vars_name
.
at
(
"Beta2Pow"
),
adam_ops
,
graph
);
}
void
FuseAdamOpPass
::
FuseAdamOps
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
vars_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
const
std
::
vector
<
ir
::
Node
*>
&
adam_ops
,
ir
::
Graph
*
graph
)
const
{
PADDLE_ENFORCE_GT
(
adam_ops
.
size
(),
static_cast
<
size_t
>
(
0
));
// Check attributions
// NOTE: If new attribution is added, the following code maybe need change.
int
op_role
=
boost
::
get
<
int
>
(
adam_ops
[
0
]
->
Op
()
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()));
float
beta1
=
boost
::
get
<
float
>
(
adam_ops
[
0
]
->
Op
()
->
GetAttr
(
"beta1"
));
float
beta2
=
boost
::
get
<
float
>
(
adam_ops
[
0
]
->
Op
()
->
GetAttr
(
"beta2"
));
float
epsilon
=
boost
::
get
<
float
>
(
adam_ops
[
0
]
->
Op
()
->
GetAttr
(
"epsilon"
));
bool
lazy_mode
=
boost
::
get
<
bool
>
(
adam_ops
[
0
]
->
Op
()
->
GetAttr
(
"lazy_mode"
));
int64_t
min_row_size_to_use_multithread
=
boost
::
get
<
int64_t
>
(
adam_ops
[
0
]
->
Op
()
->
GetAttr
(
"min_row_size_to_use_multithread"
));
for
(
auto
&
adam_op
:
adam_ops
)
{
PADDLE_ENFORCE_EQ
(
beta1
,
boost
::
get
<
float
>
(
adam_op
->
Op
()
->
GetAttr
(
"beta1"
)));
PADDLE_ENFORCE_EQ
(
beta2
,
boost
::
get
<
float
>
(
adam_op
->
Op
()
->
GetAttr
(
"beta2"
)));
PADDLE_ENFORCE_EQ
(
epsilon
,
boost
::
get
<
float
>
(
adam_op
->
Op
()
->
GetAttr
(
"epsilon"
)));
PADDLE_ENFORCE_EQ
(
lazy_mode
,
boost
::
get
<
bool
>
(
adam_op
->
Op
()
->
GetAttr
(
"lazy_mode"
)));
PADDLE_ENFORCE_EQ
(
min_row_size_to_use_multithread
,
boost
::
get
<
int64_t
>
(
adam_op
->
Op
()
->
GetAttr
(
"min_row_size_to_use_multithread"
)));
PADDLE_ENFORCE_EQ
(
op_role
,
boost
::
get
<
int
>
(
adam_op
->
Op
()
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
())));
}
// NOTE: fused_var is only exist in scope, so the graph doesn't have fused_var
// node.
VLOG
(
10
)
<<
"Insert adam to graph "
;
OpDesc
adam_desc
(
adam_ops
[
0
]
->
Op
()
->
Block
());
adam_desc
.
SetType
(
"adam"
);
adam_desc
.
SetInput
(
"Param"
,
{
fused_vars_name
.
at
(
"Param"
)});
adam_desc
.
SetInput
(
"Grad"
,
{
fused_vars_name
.
at
(
"Grad"
)});
adam_desc
.
SetInput
(
"Moment1"
,
{
fused_vars_name
.
at
(
"Moment1"
)});
adam_desc
.
SetInput
(
"Moment2"
,
{
fused_vars_name
.
at
(
"Moment2"
)});
// TODO(zcd): The LearningRate, Beta1Pow, Beta2Pow should be equal.
adam_desc
.
SetInput
(
"LearningRate"
,
adam_ops
[
0
]
->
Op
()
->
Input
(
"LearningRate"
));
adam_desc
.
SetInput
(
"Beta1Pow"
,
adam_ops
[
0
]
->
Op
()
->
Input
(
"Beta1Pow"
));
adam_desc
.
SetInput
(
"Beta2Pow"
,
adam_ops
[
0
]
->
Op
()
->
Input
(
"Beta2Pow"
));
adam_desc
.
SetOutput
(
"ParamOut"
,
{
fused_vars_name
.
at
(
"Param"
)});
adam_desc
.
SetOutput
(
"Moment1Out"
,
{
fused_vars_name
.
at
(
"Moment1"
)});
adam_desc
.
SetOutput
(
"Moment2Out"
,
{
fused_vars_name
.
at
(
"Moment2"
)});
adam_desc
.
SetAttr
(
"beta1"
,
beta1
);
adam_desc
.
SetAttr
(
"beta2"
,
beta2
);
adam_desc
.
SetAttr
(
"epsilon"
,
epsilon
);
adam_desc
.
SetAttr
(
"lazy_mode"
,
lazy_mode
);
adam_desc
.
SetAttr
(
"min_row_size_to_use_multithread"
,
min_row_size_to_use_multithread
);
adam_desc
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
op_role
);
auto
adam_node
=
graph
->
CreateOpNode
(
&
adam_desc
);
InserInputAndOutputForOptOps
(
adam_ops
,
adam_node
);
}
void
FuseAdamOpPass
::
FuseScaleOps
(
const
std
::
vector
<
std
::
string
>
&
beta_name
,
const
std
::
string
&
fused_var_name
,
const
std
::
vector
<
ir
::
Node
*>
&
adam_ops
,
ir
::
Graph
*
graph
)
const
{
PADDLE_ENFORCE_EQ
(
beta_name
.
size
(),
adam_ops
.
size
());
const
std
::
string
scale_op_name
=
"scale"
;
// Get the scale_ops of dealing the adam's beta var.
std
::
vector
<
ir
::
Node
*>
scale_ops
;
scale_ops
.
reserve
(
beta_name
.
size
());
for
(
size_t
i
=
0
;
i
<
adam_ops
.
size
();
++
i
)
{
auto
&
beta_1_pow_name
=
beta_name
[
i
];
auto
beta_pow_iter
=
std
::
find_if
(
adam_ops
[
i
]
->
inputs
.
begin
(),
adam_ops
[
i
]
->
inputs
.
end
(),
[
&
beta_name
,
&
beta_1_pow_name
](
ir
::
Node
*
var_node
)
->
bool
{
return
var_node
->
Var
()
&&
var_node
->
Var
()
->
Name
()
==
beta_1_pow_name
;
});
PADDLE_ENFORCE
(
beta_pow_iter
!=
adam_ops
[
i
]
->
inputs
.
end
());
auto
beta_pow_node
=
*
beta_pow_iter
;
auto
scale_op_iter
=
std
::
find_if
(
beta_pow_node
->
outputs
.
begin
(),
beta_pow_node
->
outputs
.
end
(),
[
&
scale_op_name
](
ir
::
Node
*
op_node
)
->
bool
{
return
op_node
->
Op
()
&&
op_node
->
Op
()
->
Type
()
==
scale_op_name
;
});
PADDLE_ENFORCE
(
scale_op_iter
!=
beta_pow_node
->
outputs
.
end
());
scale_ops
.
emplace_back
(
*
scale_op_iter
);
}
PADDLE_ENFORCE_EQ
(
scale_ops
.
size
(),
beta_name
.
size
());
// Check attributions
// NOTE: If new attribution is added, the following code maybe need change.
int
op_role
=
boost
::
get
<
int
>
(
scale_ops
[
0
]
->
Op
()
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()));
float
scale
=
boost
::
get
<
float
>
(
scale_ops
[
0
]
->
Op
()
->
GetAttr
(
"scale"
));
float
bias
=
boost
::
get
<
float
>
(
scale_ops
[
0
]
->
Op
()
->
GetAttr
(
"bias"
));
bool
bias_after_scale
=
boost
::
get
<
bool
>
(
scale_ops
[
0
]
->
Op
()
->
GetAttr
(
"bias_after_scale"
));
for
(
auto
&
scale_op
:
scale_ops
)
{
PADDLE_ENFORCE_EQ
(
scale
,
boost
::
get
<
float
>
(
scale_op
->
Op
()
->
GetAttr
(
"scale"
)));
PADDLE_ENFORCE_EQ
(
bias
,
boost
::
get
<
float
>
(
scale_op
->
Op
()
->
GetAttr
(
"bias"
)));
PADDLE_ENFORCE_EQ
(
bias_after_scale
,
boost
::
get
<
bool
>
(
scale_op
->
Op
()
->
GetAttr
(
"bias_after_scale"
)));
PADDLE_ENFORCE_EQ
(
op_role
,
boost
::
get
<
int
>
(
scale_op
->
Op
()
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
())));
}
// NOTE: fused_var is only exist in scope, so the graph doesn't have fused_var
// node.
VLOG
(
10
)
<<
"Insert fused scale to graph."
;
OpDesc
scale_desc
(
scale_ops
[
0
]
->
Op
()
->
Block
());
scale_desc
.
SetType
(
"scale"
);
scale_desc
.
SetInput
(
"X"
,
{
fused_var_name
});
scale_desc
.
SetOutput
(
"Out"
,
{
fused_var_name
});
scale_desc
.
SetAttr
(
"scale"
,
scale
);
scale_desc
.
SetAttr
(
"bias"
,
bias
);
scale_desc
.
SetAttr
(
"bias_after_scale"
,
bias_after_scale
);
scale_desc
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
op_role
);
auto
scale_node
=
graph
->
CreateOpNode
(
&
scale_desc
);
for
(
auto
scale_op
:
scale_ops
)
{
// set inputs
scale_node
->
inputs
.
insert
(
scale_node
->
inputs
.
begin
(),
scale_op
->
inputs
.
begin
(),
scale_op
->
inputs
.
end
());
for
(
auto
&
input
:
scale_op
->
inputs
)
{
std
::
replace
(
input
->
outputs
.
begin
(),
input
->
outputs
.
end
(),
scale_op
,
scale_node
);
}
// set outputs
scale_node
->
outputs
.
insert
(
scale_node
->
outputs
.
begin
(),
scale_op
->
outputs
.
begin
(),
scale_op
->
outputs
.
end
());
for
(
auto
&
output
:
scale_op
->
outputs
)
{
std
::
replace
(
output
->
inputs
.
begin
(),
output
->
inputs
.
end
(),
scale_op
,
scale_node
);
}
}
// Delete scale_ops
for
(
auto
&
scale_op
:
scale_ops
)
{
graph
->
RemoveNode
(
scale_op
);
}
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
fuse_adam_op_pass
,
paddle
::
framework
::
details
::
FuseAdamOpPass
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kPlaces
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kLocalScopes
);
paddle/fluid/framework/details/fuse_adam_op_pass.h
0 → 100644
浏览文件 @
32146857
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/fuse_optimizer_op_pass.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/ir/graph.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
class
FuseAdamOpPass
:
public
FuseOptimizerOpPass
{
private:
virtual
const
std
::
string
GetOpType
()
const
;
virtual
const
std
::
vector
<
std
::
string
>
GetAuxiliaryVarNames
()
const
;
// Fuse Adam Ops and Scale Ops which are used to update "Beta1Pow", "Beta2Pow"
virtual
void
FuseOptimizerOps
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
vars_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
const
std
::
vector
<
ir
::
Node
*>
&
adam_ops
,
ir
::
Graph
*
graph
)
const
;
void
FuseAdamOps
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
vars_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
const
std
::
vector
<
ir
::
Node
*>
&
adam_ops
,
ir
::
Graph
*
graph
)
const
;
void
FuseScaleOps
(
const
std
::
vector
<
std
::
string
>
&
aux_var_set
,
const
std
::
string
&
fused_var_name
,
const
std
::
vector
<
ir
::
Node
*>
&
adam_ops
,
ir
::
Graph
*
graph
)
const
;
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/fuse_optimizer_op_pass.cc
0 → 100644
浏览文件 @
32146857
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/fuse_optimizer_op_pass.h"
#include <algorithm>
#include <unordered_set>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
void
FuseOptimizerOpPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
ir
::
Graph
&
result
=
*
graph
;
auto
&
places
=
Get
<
const
std
::
vector
<
platform
::
Place
>>
(
kPlaces
);
auto
&
local_scopes
=
Get
<
const
std
::
vector
<
Scope
*>>
(
kLocalScopes
);
const
std
::
string
fuse_op_type
=
GetOpType
();
const
std
::
vector
<
std
::
string
>
aux_var_names
=
GetAuxiliaryVarNames
();
// Step 1: Get the specified op and auxiliary variables.
std
::
vector
<
ir
::
Node
*>
topo_nodes
=
ir
::
TopologySortOperations
(
result
);
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
aux_var_set
;
std
::
vector
<
ir
::
Node
*>
opt_ops
;
for
(
auto
&
node
:
topo_nodes
)
{
GetSpecifiedOpsAndVars
(
fuse_op_type
,
aux_var_names
,
node
,
&
opt_ops
,
&
aux_var_set
);
}
VLOG
(
10
)
<<
"Find "
<<
fuse_op_type
<<
" operators: "
<<
opt_ops
.
size
();
if
(
opt_ops
.
size
()
==
0
)
{
return
;
}
if
(
result
.
Has
(
kFusedOptType
))
{
VLOG
(
10
)
<<
"Currently only support fusing one type optimizer op. Has fused "
<<
result
.
Get
<
FusedOptType
>
(
kFusedOptType
);
return
;
}
else
{
result
.
Set
(
kFusedOptType
,
new
FusedOptType
);
}
result
.
Get
<
FusedOptType
>
(
kFusedOptType
)
=
fuse_op_type
;
// Step 2: Insert fused_var_name to FusedVars, and the FusedVars need be
// initialized in scopes before execution.
if
(
!
result
.
Has
(
kFusedVars
))
{
result
.
Set
(
kFusedVars
,
new
FusedVars
);
}
std
::
unordered_map
<
std
::
string
,
std
::
string
>
fused_vars_name
;
fused_vars_name
.
reserve
(
aux_var_names
.
size
()
+
1
);
auto
&
fused_var_set
=
result
.
Get
<
FusedVars
>
(
kFusedVars
);
const
std
::
string
prefix
(
kFusedVarNamePrefix
);
// NOTE: the fused_var_name should be unique.
for
(
auto
&
var_name
:
aux_var_names
)
{
auto
fused_var_name
=
prefix
+
"_"
+
fuse_op_type
+
"_"
+
var_name
+
"_"
+
aux_var_set
[
var_name
][
0
];
VLOG
(
10
)
<<
fused_var_name
;
fused_vars_name
.
emplace
(
var_name
,
fused_var_name
);
PADDLE_ENFORCE_EQ
(
fused_var_set
.
count
(
fused_var_name
),
0
);
fused_var_set
.
insert
(
fused_var_name
);
}
// Step 3: Get the fused Gradient's name
auto
&
params_grads
=
result
.
Get
<
ParamsAndGrads
>
(
kParamsAndGrads
);
if
(
!
result
.
Has
(
kFusedGrads
))
{
PADDLE_THROW
(
"The alloc_continuous_space_for_grad_pass should be called before this "
"pass."
);
}
auto
&
fused_grad
=
result
.
Get
<
FusedGrads
>
(
kFusedGrads
);
auto
&
fused_vars
=
result
.
Get
<
FusedVars
>
(
kFusedVars
);
auto
iter
=
std
::
find
(
fused_vars
.
begin
(),
fused_vars
.
end
(),
fused_grad
);
PADDLE_ENFORCE
(
iter
!=
fused_vars
.
end
(),
"Not find the fused_grad."
);
fused_vars_name
.
emplace
(
"Grad"
,
fused_grad
);
// Step 4: Sort the parameters and auxiliary variables according
// to parameters' name to make variables' name correspond correctly.
PADDLE_ENFORCE
(
result
.
Has
(
kParamsAndGrads
),
"Does't find kParamsAndGrads."
);
PADDLE_ENFORCE_EQ
(
params_grads
.
size
(),
aux_var_set
.
begin
()
->
second
.
size
(),
"The size of params_grads and aux_var_set are not equal."
);
SortParametersAndAuxVars
(
params_grads
,
&
aux_var_set
,
&
opt_ops
);
// Step 5: Alloc continuous space for Parameters and AuxiliaryVar(e.g.
// Moment1, Moment2, Beta1Pow, Beta2Pow) of all the optimizer ops separately.
InitFusedVarsAndAllocSpaceForVars
(
places
,
local_scopes
,
aux_var_names
,
aux_var_set
,
fused_vars_name
);
// Step 6: Fuse optimizer Ops and Scale Ops
FuseOptimizerOps
(
aux_var_set
,
fused_vars_name
,
opt_ops
,
&
result
);
// Step 7: Remove optimizer Ops
for
(
auto
&
opt_op
:
opt_ops
)
{
graph
->
RemoveNode
(
opt_op
);
}
}
void
FuseOptimizerOpPass
::
InitFusedVarsAndAllocSpaceForVars
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
std
::
string
>
&
aux_var_names
,
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
aux_var_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
)
const
{
VLOG
(
10
)
<<
"Init FusedVars."
;
// Alloc parameters and auxiliary vars in the respective scope.
size_t
idx
=
local_scopes
.
size
();
for
(
auto
iter
=
local_scopes
.
rbegin
();
iter
!=
local_scopes
.
rend
();
++
iter
,
--
idx
)
{
auto
&
scope
=
*
iter
;
for
(
auto
&
var_name
:
aux_var_names
)
{
auto
fused_var_name
=
fused_vars_name
.
at
(
var_name
);
VLOG
(
10
)
<<
"Init "
<<
fused_var_name
;
PADDLE_ENFORCE
(
scope
->
FindVar
(
fused_var_name
)
==
nullptr
,
"%s has exist in scope[%d]"
,
fused_var_name
,
idx
);
scope
->
Var
(
fused_var_name
)
->
GetMutable
<
LoDTensor
>
();
}
}
ProgramDesc
program_desc
;
auto
*
global_block
=
program_desc
.
MutableBlock
(
0
);
for
(
auto
&
var_name
:
aux_var_names
)
{
AppendAllocContinuousSpace
(
aux_var_set
.
at
(
var_name
),
fused_vars_name
.
at
(
var_name
),
true
,
global_block
);
}
for
(
size_t
i
=
0
;
i
<
local_scopes
.
size
();
++
i
)
{
for
(
auto
&
op_desc
:
global_block
->
AllOps
())
{
auto
op
=
OpRegistry
::
CreateOp
(
*
op_desc
);
op
->
Run
(
*
local_scopes
[
i
],
places
[
i
]);
}
}
}
void
FuseOptimizerOpPass
::
SortParametersAndAuxVars
(
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
params_grads
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
*
aux_vars_set
,
std
::
vector
<
ir
::
Node
*>
*
ops
)
const
{
PADDLE_ENFORCE_NE
(
aux_vars_set
->
count
(
"Param"
),
static_cast
<
size_t
>
(
0
));
auto
&
param_vec
=
aux_vars_set
->
at
(
"Param"
);
std
::
vector
<
size_t
>
param_sort_idx
;
param_sort_idx
.
reserve
(
param_vec
.
size
());
for
(
auto
&
p_g
:
params_grads
)
{
auto
iter
=
std
::
find
(
param_vec
.
begin
(),
param_vec
.
end
(),
p_g
.
first
);
PADDLE_ENFORCE
(
iter
!=
param_vec
.
end
());
auto
idx
=
std
::
distance
(
param_vec
.
begin
(),
iter
);
param_sort_idx
.
emplace_back
(
idx
);
}
for
(
auto
&
aux_vars
:
*
aux_vars_set
)
{
std
::
vector
<
std
::
string
>
sorted_vars
;
sorted_vars
.
reserve
(
aux_vars
.
second
.
size
());
for
(
size_t
i
=
0
;
i
<
aux_vars
.
second
.
size
();
++
i
)
{
sorted_vars
.
emplace_back
(
aux_vars
.
second
.
at
(
param_sort_idx
[
i
]));
}
std
::
swap
(
aux_vars
.
second
,
sorted_vars
);
std
::
stringstream
out
;
for
(
auto
&
var_name
:
aux_vars
.
second
)
{
out
<<
var_name
<<
" "
;
}
VLOG
(
10
)
<<
aux_vars
.
first
<<
": "
<<
out
.
str
();
}
std
::
vector
<
ir
::
Node
*>
sorted_ops
;
sorted_ops
.
reserve
(
ops
->
size
());
for
(
size_t
i
=
0
;
i
<
ops
->
size
();
++
i
)
{
sorted_ops
.
emplace_back
(
ops
->
at
(
param_sort_idx
[
i
]));
}
std
::
swap
(
*
ops
,
sorted_ops
);
}
void
FuseOptimizerOpPass
::
GetSpecifiedOpsAndVars
(
const
std
::
string
&
op_type
,
const
std
::
vector
<
std
::
string
>
&
aux_vars_name
,
ir
::
Node
*
node
,
std
::
vector
<
ir
::
Node
*>
*
ops
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
*
aux_args_name
)
const
{
if
(
node
->
Op
()
->
Type
()
!=
op_type
)
return
;
for
(
auto
&
var_n
:
aux_vars_name
)
{
auto
arg_names
=
node
->
Op
()
->
Input
(
var_n
);
PADDLE_ENFORCE_EQ
(
arg_names
.
size
(),
static_cast
<
size_t
>
(
1
));
(
*
aux_args_name
)[
var_n
].
emplace_back
(
arg_names
[
0
]);
VLOG
(
10
)
<<
var_n
<<
", "
<<
arg_names
[
0
];
}
ops
->
emplace_back
(
node
);
}
void
FuseOptimizerOpPass
::
AppendAllocContinuousSpace
(
const
std
::
vector
<
std
::
string
>
&
args
,
const
std
::
string
&
out_arg
,
bool
copy_data
,
BlockDesc
*
global_block
)
const
{
auto
op_desc
=
global_block
->
AppendOp
();
op_desc
->
SetType
(
"alloc_continuous_space"
);
op_desc
->
SetInput
(
"Input"
,
args
);
op_desc
->
SetOutput
(
"Output"
,
args
);
op_desc
->
SetOutput
(
"FusedOutput"
,
{
out_arg
});
op_desc
->
SetAttr
(
"copy_data"
,
copy_data
);
op_desc
->
SetAttr
(
"check_name"
,
true
);
}
void
FuseOptimizerOpPass
::
InserInputAndOutputForOptOps
(
const
std
::
vector
<
ir
::
Node
*>
&
opt_ops
,
ir
::
Node
*
opt_node
)
const
{
std
::
unordered_set
<
ir
::
Node
*>
inputs
;
std
::
unordered_set
<
ir
::
Node
*>
outputs
;
for
(
auto
opt_op
:
opt_ops
)
{
// set inputs
inputs
.
insert
(
opt_op
->
inputs
.
begin
(),
opt_op
->
inputs
.
end
());
for
(
auto
&
input
:
opt_op
->
inputs
)
{
replace
(
input
->
outputs
.
begin
(),
input
->
outputs
.
end
(),
opt_op
,
opt_node
);
}
// set outputs
outputs
.
insert
(
opt_op
->
outputs
.
begin
(),
opt_op
->
outputs
.
end
());
for
(
auto
&
output
:
opt_op
->
outputs
)
{
replace
(
output
->
inputs
.
begin
(),
output
->
inputs
.
end
(),
opt_op
,
opt_node
);
}
}
opt_node
->
inputs
.
insert
(
opt_node
->
inputs
.
begin
(),
inputs
.
begin
(),
inputs
.
end
());
opt_node
->
outputs
.
insert
(
opt_node
->
outputs
.
begin
(),
outputs
.
begin
(),
outputs
.
end
());
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/fuse_optimizer_op_pass.h
0 → 100644
浏览文件 @
32146857
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/ir/graph.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
class
FuseOptimizerOpPass
:
public
ir
::
Pass
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
protected:
virtual
void
SortParametersAndAuxVars
(
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
params_grads
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
*
aux_var_set
,
std
::
vector
<
ir
::
Node
*>
*
ops
)
const
;
void
InserInputAndOutputForOptOps
(
const
std
::
vector
<
ir
::
Node
*>
&
opt_ops
,
ir
::
Node
*
opt_node
)
const
;
private:
virtual
const
std
::
string
GetOpType
()
const
=
0
;
virtual
const
std
::
vector
<
std
::
string
>
GetAuxiliaryVarNames
()
const
=
0
;
virtual
void
FuseOptimizerOps
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
vars_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
const
std
::
vector
<
ir
::
Node
*>
&
adam_ops
,
ir
::
Graph
*
graph
)
const
=
0
;
void
GetSpecifiedOpsAndVars
(
const
std
::
string
&
op_type
,
const
std
::
vector
<
std
::
string
>
&
aux_vars_name
,
ir
::
Node
*
node
,
std
::
vector
<
ir
::
Node
*>
*
ops
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
*
aux_args_name
)
const
;
void
AppendAllocContinuousSpace
(
const
std
::
vector
<
std
::
string
>
&
args
,
const
std
::
string
&
out_arg
,
bool
copy_data
,
BlockDesc
*
global_block
)
const
;
void
InitFusedVarsAndAllocSpaceForVars
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
std
::
string
>
&
aux_var_names
,
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
aux_var_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
)
const
;
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/fuse_sgd_op_pass.cc
0 → 100644
浏览文件 @
32146857
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/fuse_sgd_op_pass.h"
#include <algorithm>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
const
std
::
string
FuseSgdOpPass
::
GetOpType
()
const
{
return
"sgd"
;
}
const
std
::
vector
<
std
::
string
>
FuseSgdOpPass
::
GetAuxiliaryVarNames
()
const
{
return
{
"Param"
};
}
void
FuseSgdOpPass
::
FuseOptimizerOps
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
aux_var_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
const
std
::
vector
<
ir
::
Node
*>
&
sgd_ops
,
ir
::
Graph
*
graph
)
const
{
FuseSgdOps
(
aux_var_set
,
fused_vars_name
,
sgd_ops
,
graph
);
}
void
FuseSgdOpPass
::
FuseSgdOps
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
vars_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
const
std
::
vector
<
ir
::
Node
*>
&
sgd_ops
,
ir
::
Graph
*
graph
)
const
{
PADDLE_ENFORCE_GT
(
sgd_ops
.
size
(),
static_cast
<
size_t
>
(
0
));
// NOTE: fused_var is only exist in scope, so the graph doesn't have fused_var
// node.
int
op_role
=
boost
::
get
<
int
>
(
sgd_ops
[
0
]
->
Op
()
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()));
VLOG
(
10
)
<<
"Insert sgd to graph "
;
// Add fused scale
OpDesc
Sgd_desc
(
sgd_ops
[
0
]
->
Op
()
->
Block
());
Sgd_desc
.
SetType
(
"sgd"
);
Sgd_desc
.
SetInput
(
"Param"
,
{
fused_vars_name
.
at
(
"Param"
)});
Sgd_desc
.
SetInput
(
"Grad"
,
{
fused_vars_name
.
at
(
"Grad"
)});
Sgd_desc
.
SetOutput
(
"ParamOut"
,
{
fused_vars_name
.
at
(
"Param"
)});
// TODO(zcd): The LearningRate, Beta1Pow, Beta2Pow should be equal.
Sgd_desc
.
SetInput
(
"LearningRate"
,
sgd_ops
[
0
]
->
Op
()
->
Input
(
"LearningRate"
));
// NOTE: multi_devices_pass requires that every op should have a role.
Sgd_desc
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
op_role
);
auto
sgd_node
=
graph
->
CreateOpNode
(
&
Sgd_desc
);
InserInputAndOutputForOptOps
(
sgd_ops
,
sgd_node
);
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
fuse_sgd_op_pass
,
paddle
::
framework
::
details
::
FuseSgdOpPass
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kPlaces
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kLocalScopes
);
paddle/fluid/framework/details/fuse_sgd_op_pass.h
0 → 100644
浏览文件 @
32146857
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/fuse_optimizer_op_pass.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/ir/graph.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
class
FuseSgdOpPass
:
public
FuseOptimizerOpPass
{
private:
virtual
const
std
::
string
GetOpType
()
const
;
virtual
const
std
::
vector
<
std
::
string
>
GetAuxiliaryVarNames
()
const
;
// Fuse Sgd Ops
virtual
void
FuseOptimizerOps
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
vars_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
const
std
::
vector
<
ir
::
Node
*>
&
sgd_ops
,
ir
::
Graph
*
graph
)
const
;
void
FuseSgdOps
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
vars_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
const
std
::
vector
<
ir
::
Node
*>
&
sgd_ops
,
ir
::
Graph
*
graph
)
const
;
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
浏览文件 @
32146857
...
...
@@ -24,6 +24,19 @@ namespace paddle {
namespace
framework
{
namespace
details
{
// Note(zcd): Addresses should be aligned, otherwise, the results may have
// diff.
static
size_t
Alignment
(
size_t
size
,
const
platform
::
Place
&
place
)
{
// Allow to allocate the minimum chunk size is 4 KB.
size_t
alignment
=
1
<<
12
;
if
(
platform
::
is_gpu_place
(
place
))
{
// Allow to allocate the minimum chunk size is 256 B.
alignment
=
1
<<
8
;
}
size_t
remaining
=
size
%
alignment
;
return
remaining
==
0
?
size
:
size
+
(
alignment
-
remaining
);
}
typedef
std
::
vector
<
std
::
vector
<
std
::
pair
<
std
::
string
,
const
LoDTensor
*>>>
GradientAndLoDTensor
;
...
...
@@ -111,10 +124,11 @@ void FusedAllReduceOpHandle::RunImpl() {
return
grad1
.
second
->
data
<
void
>
()
<
grad2
.
second
->
data
<
void
>
();
});
size_t
size_of_dtype
=
framework
::
SizeOfType
(
dtype
);
for
(
size_t
k
=
1
;
k
<
g_tensor
.
size
();
++
k
)
{
const
void
*
cur_address
=
g_tensor
.
at
(
k
-
1
).
second
->
data
<
void
>
();
int64_t
len
=
g_tensor
.
at
(
k
-
1
).
second
->
numel
();
auto
offset
=
len
*
framework
::
SizeOfType
(
dtype
);
auto
offset
=
Alignment
(
len
*
size_of_dtype
,
places_
[
0
]
);
void
*
infer_next_address
=
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
uintptr_t
>
(
cur_address
)
+
offset
);
const
void
*
next_address
=
g_tensor
.
at
(
k
).
second
->
data
<
void
>
();
...
...
@@ -228,18 +242,21 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel(
const
std
::
vector
<
std
::
pair
<
std
::
string
,
const
LoDTensor
*>>
&
grad_tensor
,
proto
::
VarType
::
Type
*
dtype
,
int64_t
*
numel
)
const
{
*
numel
=
0
;
size_t
size_of_dtype
=
0
;
for
(
size_t
i
=
0
;
i
<
grad_tensor
.
size
();
++
i
)
{
// Get element number
int64_t
len
=
grad_tensor
.
at
(
i
).
second
->
numel
();
PADDLE_ENFORCE_GT
(
len
,
0
);
*
numel
+=
len
;
// Get dtype
auto
ele_type
=
grad_tensor
.
at
(
i
).
second
->
type
();
if
(
i
==
0
)
{
*
dtype
=
ele_type
;
size_of_dtype
=
framework
::
SizeOfType
(
ele_type
);
}
PADDLE_ENFORCE_EQ
(
ele_type
,
*
dtype
);
// Get element number
int64_t
len
=
grad_tensor
.
at
(
i
).
second
->
numel
();
PADDLE_ENFORCE_GT
(
len
,
0
);
// Alignment(len)
*
numel
+=
Alignment
(
len
*
size_of_dtype
,
places_
[
0
])
/
size_of_dtype
;
}
}
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
32146857
...
...
@@ -20,7 +20,6 @@
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/ir/graph.h"
...
...
@@ -34,6 +33,10 @@ namespace framework {
class
Scope
;
namespace
details
{
constexpr
char
kLossVarName
[]
=
"loss_var_name"
;
constexpr
char
kStrategy
[]
=
"strategy"
;
constexpr
char
kNRanks
[]
=
"nranks"
;
class
MultiDevSSAGraphBuilderBase
:
public
ir
::
Pass
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
...
...
paddle/fluid/framework/details/multi_devices_helper.h
浏览文件 @
32146857
...
...
@@ -20,7 +20,6 @@
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/details/var_handle.h"
...
...
@@ -41,22 +40,25 @@ namespace details {
// `std::vector<VarHandle*>` is the version of varaibles.
typedef
std
::
vector
<
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
VarHandle
*>>>
GraphVars
;
const
char
kGraphVars
[]
=
"vars"
;
// aux variables to represent dependency. Useful to resolve data hazard.
typedef
std
::
unordered_set
<
VarHandleBase
*>
GraphDepVars
;
const
char
kGraphDepVars
[]
=
"dep_vars"
;
constexpr
char
kGraphVars
[]
=
"vars"
;
constexpr
char
kNCCLCtxs
[]
=
"nccl_ctxs"
;
constexpr
char
kLossVarName
[]
=
"loss_var_name"
;
constexpr
char
kPlaces
[]
=
"places"
;
constexpr
char
kLocalScopes
[]
=
"local_scopes"
;
constexpr
char
kStrategy
[]
=
"strategy"
;
constexpr
char
kNRanks
[]
=
"nranks"
;
constexpr
char
kNCCLCtxs
[]
=
"nccl_ctxs"
;
// aux variables to represent dependency. Useful to resolve data hazard.
typedef
std
::
unordered_set
<
VarHandleBase
*>
GraphDepVars
;
constexpr
char
kGraphDepVars
[]
=
"dep_vars"
;
typedef
std
::
unordered_set
<
std
::
string
>
FusedVars
;
constexpr
char
kFusedVars
[]
=
"fused_vars"
;
constexpr
char
kFusedVarNamePrefix
[]
=
"@FUSEDVAR@"
;
typedef
std
::
string
FusedOptType
;
constexpr
char
kFusedOptType
[]
=
"fused_opt_type"
;
typedef
std
::
string
FusedGrads
;
constexpr
char
kFusedGrads
[]
=
"fused_gradients"
;
typedef
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
ParamsAndGrads
;
constexpr
char
kParamsAndGrads
[]
=
"params_grads"
;
...
...
@@ -65,8 +67,6 @@ typedef std::vector<std::vector<std::pair<std::string, std::string>>>
GroupGradsAndParams
;
constexpr
char
kGroupGradsAndParams
[]
=
"group_grads_params"
;
constexpr
char
kFusedVarNamePrefix
[]
=
"@FUSEDVAR@"
;
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
浏览文件 @
32146857
...
...
@@ -24,13 +24,13 @@ ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor(
const
ExecutionStrategy
&
strategy
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
ir
::
Graph
*
graph
)
:
graph_
(
graph
),
pool_
(
strategy
.
num_threads_
>=
2
?
new
::
ThreadPool
(
strategy
.
num_threads_
)
:
nullptr
),
prepare_pool_
(
1
),
local_scopes_
(
local_scopes
),
places_
(
places
),
fetch_ctxs_
(
places
),
strategy_
(
strategy
)
{
strategy_
(
strategy
),
prepare_pool_
(
1
),
pool_
(
strategy
.
num_threads_
>=
2
?
new
::
ThreadPool
(
strategy
.
num_threads_
)
:
nullptr
)
{
PrepareOpDeps
();
CopyOpDeps
();
}
...
...
paddle/fluid/framework/details/threaded_ssa_graph_executor.h
浏览文件 @
32146857
...
...
@@ -63,13 +63,20 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
details
::
OpHandleBase
*
op
);
private:
// Note(zcd): the ThreadPool should be placed last so that ThreadPool should
// be destroyed first.
ir
::
Graph
*
graph_
;
std
::
unique_ptr
<::
ThreadPool
>
pool_
;
::
ThreadPool
prepare_pool_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
platform
::
Place
>
places_
;
platform
::
DeviceContextPool
fetch_ctxs_
;
ExceptionHolder
exception_holder_
;
std
::
unique_ptr
<
OpDependentData
>
op_deps_
;
std
::
future
<
std
::
unique_ptr
<
OpDependentData
>>
op_deps_futures_
;
ExecutionStrategy
strategy_
;
// use std::list because clear(), push_back, and for_each are O(1)
std
::
list
<
std
::
future
<
void
>>
run_op_futures_
;
::
ThreadPool
prepare_pool_
;
std
::
unique_ptr
<::
ThreadPool
>
pool_
;
void
InsertPendingOp
(
std
::
unordered_map
<
OpHandleBase
*
,
size_t
>
*
pending_ops
,
OpHandleBase
*
op_instance
)
const
;
...
...
@@ -88,14 +95,6 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
void
PrepareOpDeps
();
void
CopyOpDeps
();
private:
std
::
future
<
std
::
unique_ptr
<
OpDependentData
>>
op_deps_futures_
;
ExecutionStrategy
strategy_
;
std
::
unique_ptr
<
OpDependentData
>
op_deps_
;
// use std::list because clear(), push_back, and for_each are O(1)
std
::
list
<
std
::
future
<
void
>>
run_op_futures_
;
};
}
// namespace details
...
...
paddle/fluid/framework/operator.h
浏览文件 @
32146857
...
...
@@ -365,6 +365,9 @@ class ExecutionContext {
auto
shared_allocation
=
std
::
shared_ptr
<
memory
::
allocation
::
Allocation
>
(
allocation_ptr
,
deleter
);
PADDLE_ENFORCE
(
dynamic_cast
<
platform
::
TemporaryAllocation
*>
(
allocation_ptr
)
!=
nullptr
,
"The AllocationPtr must be TemporaryAllocation."
);
PADDLE_ENFORCE_GE
(
allocation_ptr
->
size
(),
framework
::
product
(
dim
)
*
sizeof
(
T
));
...
...
paddle/fluid/framework/tensor.cc
浏览文件 @
32146857
...
...
@@ -70,7 +70,7 @@ Tensor& Tensor::ShareDataWith(const Tensor& src) {
return
*
this
;
}
Tensor
Tensor
::
Slice
(
int
begin_idx
,
in
t
end_idx
)
const
{
Tensor
Tensor
::
Slice
(
int
64_t
begin_idx
,
int64_
t
end_idx
)
const
{
check_memory_size
();
PADDLE_ENFORCE_GE
(
begin_idx
,
0
,
"The start row index must be greater than 0."
);
...
...
paddle/fluid/framework/tensor.h
浏览文件 @
32146857
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <cstring>
#include <memory>
#include <typeindex>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/ddim.h"
...
...
@@ -27,10 +28,6 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_utils.h"
#endif
namespace
paddle
{
namespace
framework
{
...
...
@@ -41,34 +38,10 @@ class Tensor {
#ifdef PADDLE_WITH_MKLDNN
public:
// TODO(jczaja): This is depracted and will be removed
inline
mkldnn
::
memory
::
format
format
()
const
{
if
(
layout_
==
DataLayout
::
kMKLDNN
)
{
return
static_cast
<
mkldnn
::
memory
::
format
>
(
mem_pd_
.
desc
().
data
.
format
);
}
else
{
return
mkldnn
::
memory
::
format
::
format_undef
;
}
}
inline
mkldnn
::
memory
::
format
format
()
const
{
return
format_
;
}
// TODO(jczaja): This is depracted and will be removed
inline
void
set_format
(
const
mkldnn
::
memory
::
format
fmt
,
mkldnn
::
memory
::
data_type
data_type
=
mkldnn
::
memory
::
f32
)
{
mem_pd_
=
paddle
::
platform
::
create_prim_desc_from_format
(
paddle
::
framework
::
vectorize2int
(
dims
()),
fmt
,
data_type
);
layout_
=
DataLayout
::
kMKLDNN
;
}
inline
mkldnn
::
memory
::
primitive_desc
get_mkldnn_prim_desc
()
const
{
return
mem_pd_
;
}
inline
void
set_mkldnn_prim_desc
(
const
mkldnn
::
memory
::
primitive_desc
&
mem_pd
)
{
// Internally MKL-DNN is just copying (increasing reference counter)
// to shared_ptr. So asignment should be quite cheap
mem_pd_
=
mem_pd
;
layout_
=
DataLayout
::
kMKLDNN
;
inline
void
set_format
(
const
mkldnn
::
memory
::
format
format
)
{
format_
=
format
;
}
protected:
...
...
@@ -76,9 +49,12 @@ class Tensor {
* @brief the detail format of memory block which have layout as kMKLDNN
*
* @note MKLDNN lib support various memory format like nchw, nhwc, nChw8C,
* nChw16c, etc. For a MKLDNN memory block, we store memory descriptor
* nChw16c, etc. For a MKLDNN memory block, layout will be set as
* DataLayout::kMKLDNN meanwhile detail memory format will be kept in
* this field.
*/
mutable
mkldnn
::
memory
::
primitive_desc
mem_pd_
;
mkldnn
::
memory
::
format
format_
=
mkldnn
::
memory
::
format
::
format_undef
;
#endif
public:
...
...
@@ -157,7 +133,7 @@ class Tensor {
* @param[in] end_idx The index of the end row(exclusive) to slice.
* The index number begins from 0.
*/
Tensor
Slice
(
int
begin_idx
,
in
t
end_idx
)
const
;
Tensor
Slice
(
int
64_t
begin_idx
,
int64_
t
end_idx
)
const
;
platform
::
Place
place
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
...
...
paddle/fluid/framework/tensor_util.cc
浏览文件 @
32146857
...
...
@@ -44,11 +44,6 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
<<
dst_place
;
return
;
}
#ifdef PADDLE_WITH_MKLDNN
if
(
src
.
layout
()
==
DataLayout
::
kMKLDNN
)
{
dst
->
set_mkldnn_prim_desc
(
src
.
get_mkldnn_prim_desc
());
}
#endif
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
),
src_ptr
,
size
);
}
...
...
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
32146857
...
...
@@ -4,7 +4,6 @@ cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
cc_library
(
locked_allocator SRCS locked_allocator.cc DEPS allocator
)
cc_library
(
buffered_allocator SRCS buffered_allocator.cc DEPS allocator
)
cc_library
(
legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler
)
cc_library
(
zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator
)
cc_test
(
buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator
)
if
(
WITH_GPU
)
...
...
@@ -38,20 +37,30 @@ else ()
set
(
AllocatorFacadeDeps
)
endif
()
list
(
APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator
)
cc_library
(
aligned_allocator SRCS aligned_allocator.cc DEPS allocator
)
cc_library
(
auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator
)
cc_library
(
zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator
)
cc_library
(
conditional_allocator SRCS conditional_allocator.cc DEPS allocator
)
cc_library
(
allocator_strategy SRCS allocator_strategy.cc DEPS gflags
${
AllocatorFacadeDeps
}
)
cc_library
(
allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy
)
cc_library
(
allocator_strategy SRCS allocator_strategy.cc DEPS gflags
)
cc_library
(
allocator_facade SRCS allocator_facade.cc DEPS
${
AllocatorFacadeDeps
}
cpu_allocator
locked_allocator
best_fit_allocator
aligned_allocator
auto_increment_allocator
zero_size_allocator
conditional_allocator
retry_allocator
buffered_allocator
allocator_strategy
legacy_allocator
)
nv_test
(
allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade
)
cc_test
(
retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator
)
cc_test
(
naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade
)
paddle/fluid/memory/allocation/aligned_allocator.h
浏览文件 @
32146857
...
...
@@ -94,8 +94,6 @@ class AlignedAllocator : public ThinAlignedAllocator {
underlying_allocator_
->
Allocate
(
size
+
kAlignment
,
attr
);
return
new
AlignedAllocation
<
kAlignment
>
(
std
::
move
(
raw_allocation
),
size
);
}
void
FreeImpl
(
Allocation
*
allocation
)
override
{
delete
allocation
;
}
};
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/allocator.cc
浏览文件 @
32146857
...
...
@@ -27,24 +27,16 @@ bool Allocator::IsAllocThreadSafe() const { return false; }
AllocationPtr
Allocator
::
Allocate
(
size_t
size
,
Allocator
::
Attr
attr
)
{
auto
ptr
=
AllocateImpl
(
size
,
attr
);
ptr
->
RegisterDecoratedA
llocator
(
this
);
ptr
->
set_a
llocator
(
this
);
return
AllocationPtr
(
ptr
);
}
void
Allocator
::
FreeImpl
(
Allocation
*
allocation
)
{
Allocator
*
allocator
=
allocation
->
TopDecoratedAllocator
();
allocator
->
Free
(
allocation
);
}
void
Allocator
::
Free
(
Allocation
*
allocation
)
{
allocation
->
PopDecoratedAllocator
();
FreeImpl
(
allocation
);
}
void
Allocator
::
Free
(
Allocation
*
allocation
)
{
delete
allocation
;
}
const
char
*
BadAlloc
::
what
()
const
noexcept
{
return
msg_
.
c_str
();
}
void
AllocationDeleter
::
operator
()(
Allocation
*
allocation
)
const
{
Allocator
*
allocator
=
allocation
->
TopDecoratedA
llocator
();
auto
*
allocator
=
allocation
->
a
llocator
();
allocator
->
Free
(
allocation
);
}
...
...
paddle/fluid/memory/allocation/allocator.h
浏览文件 @
32146857
...
...
@@ -46,56 +46,13 @@ class Allocator;
// NOTE: this is the base class of Allocation. Each allocator can use its own
// allocation object.
// NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0
/**
* Allocation is returned by Allocator::Allocate() method.
*
* An allocator may be decorated by another allocator. For example, we can
* decorate
* a RetryAllocator to any allocator to perform allocation retry when first
* allocation request fails.
*
* Explanations of Allocator design is as follows:
*
* Suppose we have an allocator which is decorated by several allocators:
*
* A(1) <- A(2) <- A(3) <- ... <- A(n)
*
* , and the public allocator is A(1).
*
* The allocation process would be:
*
* A(n).Allocate() -> ... -> A(2).Allocate() -> A(1).Allocate()
*
* , and the free process would be:
*
* A(1).Free() -> A(2).Free() -> ... -> A(n).Free()
*
* Therefore, we should record the allocator chain when allocating, so
* that we can free the allocation in the reverse order of allocator chain.
* The field `decorated_allocators_` is used to record this chain.
*
* Another example is that we want to add additional fields in Allocation,
* e.g., something what is done in AlignedAllocator, etc.
* In this case, we should declare a derived class of Allocation, which
* contains an underlying Allocation allocated by the underlying allocator.
* Therefore, `decorated_allocators_` of the new Allocation object would
* be a new chain, differing from the underlying Allocation object.
*/
class
Allocation
{
public:
Allocation
(
void
*
ptr
,
size_t
size
,
platform
::
Place
place
)
:
ptr_
(
ptr
),
size_
(
size
),
place_
(
place
)
{
// NOTE(zjl): Since decorated_allocators_ is usually a small vector
// We reserve a small buffer to it to prevent frequent heap allocation
// Not quite sure whether we need something like gtl vector.
decorated_allocators_
.
reserve
(
8
);
}
:
allocator_
(
nullptr
),
ptr_
(
ptr
),
size_
(
size
),
place_
(
place
)
{}
Allocation
(
const
Allocation
&
o
)
=
delete
;
Allocation
&
operator
=
(
const
Allocation
&
o
)
=
delete
;
Allocation
(
Allocation
&&
o
)
=
delete
;
Allocation
&
operator
=
(
Allocation
&&
o
)
=
delete
;
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
...
...
@@ -117,31 +74,17 @@ class Allocation {
const
platform
::
Place
&
place
()
const
{
return
place_
;
}
virtual
~
Allocation
();
private:
const
std
::
vector
<
Allocator
*>&
DecoratedAllocators
()
const
{
return
decorated_allocators_
;
}
inline
void
RegisterDecoratedAllocator
(
Allocator
*
allocator
)
{
decorated_allocators_
.
push_back
(
allocator
);
}
Allocator
*
allocator
()
{
return
allocator_
;
}
inline
void
PopDecoratedAllocator
()
{
decorated_allocators_
.
pop_back
()
;
}
void
set_allocator
(
Allocator
*
allocator
)
{
allocator_
=
allocator
;
}
inline
Allocator
*
TopDecoratedAllocator
()
{
return
decorated_allocators_
.
back
();
}
virtual
~
Allocation
();
private:
Allocator
*
allocator_
;
void
*
ptr_
;
size_t
size_
;
platform
::
Place
place_
;
std
::
vector
<
Allocator
*>
decorated_allocators_
;
friend
class
Allocator
;
friend
class
AllocationDeleter
;
};
using
AllocationPtr
=
std
::
unique_ptr
<
Allocation
,
AllocationDeleter
>
;
...
...
@@ -191,12 +134,9 @@ class Allocator {
// True if the `Allocate` is thread safe.
virtual
bool
IsAllocThreadSafe
()
const
;
// This function should not be called outside
void
Free
(
Allocation
*
allocation
);
protected:
virtual
void
Free
(
Allocation
*
allocation
);
virtual
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
=
0
;
virtual
void
FreeImpl
(
Allocation
*
allocation
);
private:
friend
class
AllocationDeleter
;
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
32146857
...
...
@@ -49,17 +49,6 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
static
inline
std
::
shared_ptr
<
Allocator
>
WrapRetryAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
,
int64_t
retry_time
)
{
if
(
retry_time
>
0
)
{
auto
*
retry_allocator
=
new
RetryAllocator
(
std
::
move
(
allocator
),
retry_time
);
allocator
.
reset
(
retry_allocator
);
}
return
allocator
;
}
// TODO(yy): Dirty code here. This class should be configurable in runtime.
class
CPUManagedAllocator
:
public
Allocator
{
public:
...
...
@@ -123,10 +112,14 @@ class ChunkedAllocator : public Allocator {
std
::
shared_ptr
<
Allocator
>
CreateAllocatorWithChunk
()
{
chunks_
.
emplace_back
(
raw_allocator_
->
Allocate
(
max_chunk_size_
));
auto
*
allocation
=
chunks_
.
back
().
get
();
std
::
shared
_ptr
<
Allocator
>
allocator
(
new
LockedAllocator
(
std
::
shared
_ptr
<
Allocator
>
(
new
BestFitAllocator
(
allocation
))));
std
::
unique
_ptr
<
Allocator
>
allocator
(
new
LockedAllocator
(
std
::
unique
_ptr
<
Allocator
>
(
new
BestFitAllocator
(
allocation
))));
allocator
=
WrapRetryAllocator
(
allocator
,
retry_time_
);
if
(
retry_time_
>
0
)
{
auto
*
retry_allocator
=
new
RetryAllocator
(
std
::
move
(
allocator
),
retry_time_
);
allocator
.
reset
(
retry_allocator
);
}
return
std
::
make_shared
<
AlignedAllocator
<
64u
>>
(
std
::
move
(
allocator
));
}
...
...
@@ -197,23 +190,13 @@ class AllocatorFacadePrivate {
~
AllocatorFacadePrivate
()
=
default
;
AllocatorFacadePrivate
()
{
auto
strategy
=
GetAllocatorStrategy
();
switch
(
strategy
)
{
case
AllocatorStrategy
::
kLegacy
:
{
if
(
GetAllocatorStrategy
()
==
AllocatorStrategy
::
kLegacy
)
{
InitLegacyAllocator
();
break
;
}
case
AllocatorStrategy
::
kNaiveBestFit
:
{
}
else
{
InitCPUAllocator
();
InitCUDAAllocator
();
InitCUDAPinnedAllocator
();
WrapZeroSizeAllocator
();
break
;
}
default:
{
PADDLE_THROW
(
"Unsupported allocator strategy: %d"
,
static_cast
<
int
>
(
strategy
));
}
}
}
...
...
@@ -271,7 +254,8 @@ AllocatorFacade& AllocatorFacade::Instance() {
std
::
shared_ptr
<
Allocation
>
AllocatorFacade
::
AllocShared
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
)
{
return
std
::
shared_ptr
<
Allocation
>
(
Alloc
(
place
,
size
,
attr
));
return
std
::
shared_ptr
<
Allocation
>
(
Alloc
(
place
,
size
,
attr
).
release
(),
AllocationDeleter
());
}
AllocationPtr
AllocatorFacade
::
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
...
...
paddle/fluid/memory/allocation/allocator_strategy.cc
浏览文件 @
32146857
...
...
@@ -19,22 +19,16 @@
DEFINE_string
(
allocator_strategy
,
"legacy"
,
"The allocation strategy. Legacy means the original allocator of Fluid."
"naive_best_fit means the experimental best fit allocator. "
"allocator. Enum in [legacy, naive_best_fit]."
);
"New means the experimental allocators of Fluid. in [legacy, new]"
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
static
AllocatorStrategy
GetStrategyFromFlag
()
{
if
(
FLAGS_allocator_strategy
==
"legacy"
)
{
return
AllocatorStrategy
::
kLegacy
;
}
else
if
(
FLAGS_allocator_strategy
==
"naive_best_fit"
)
{
return
AllocatorStrategy
::
kNaiveBestFit
;
}
else
{
PADDLE_THROW
(
"Unsupported allocator strategy: %s"
,
FLAGS_allocator_strategy
);
}
return
FLAGS_allocator_strategy
==
"legacy"
?
AllocatorStrategy
::
kLegacy
:
AllocatorStrategy
::
kNaiveBestFit
;
}
AllocatorStrategy
GetAllocatorStrategy
()
{
...
...
paddle/fluid/memory/allocation/best_fit_allocator.cc
浏览文件 @
32146857
...
...
@@ -109,7 +109,7 @@ size_t BestFitAllocator::NumFreeChunks() const {
}
return
num
;
}
void
BestFitAllocator
::
Free
Impl
(
Allocation
*
allocation
)
{
void
BestFitAllocator
::
Free
(
Allocation
*
allocation
)
{
auto
*
bf_allocation
=
dynamic_cast
<
BestFitAllocation
*>
(
allocation
);
PADDLE_ENFORCE_NOT_NULL
(
bf_allocation
,
"The input allocation is not BestFitAllocation."
);
...
...
paddle/fluid/memory/allocation/best_fit_allocator.h
浏览文件 @
32146857
...
...
@@ -119,7 +119,7 @@ class BestFitAllocator : public Allocator {
void
InsertFreeNode
(
const
ListIt
&
it
);
protected:
void
Free
Impl
(
Allocation
*
allocation
)
override
;
void
Free
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
...
...
paddle/fluid/memory/allocation/buffered_allocator.cc
浏览文件 @
32146857
...
...
@@ -22,11 +22,11 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
BufferedAllocator
::
BufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
)
BufferedAllocator
::
BufferedAllocator
(
std
::
unique_ptr
<
Allocator
>
&&
allocator
)
:
underlying_allocator_
(
std
::
move
(
allocator
))
{
PADDLE_ENFORCE_NOT_NULL
(
underlying_allocator_
,
"Underlying allocator of BufferedAllocator must
not be null
"
);
"Underlying allocator of BufferedAllocator must
be unmanaged
"
);
if
(
underlying_allocator_
->
IsAllocThreadSafe
())
{
mtx_
.
reset
(
new
std
::
mutex
());
}
...
...
@@ -41,19 +41,19 @@ void BufferedAllocator::FreeCache(size_t size) {
while
(
!
allocations_
.
empty
())
{
// free the largest
auto
it
=
--
allocations_
.
end
();
cur
+=
it
->
second
->
size
();
underlying_allocator_
->
Free
(
it
->
second
.
release
()
);
delete
it
->
second
.
release
(
);
allocations_
.
erase
(
it
);
if
(
cur
>=
size
)
return
;
}
}
bool
BufferedAllocator
::
IsAllocThreadSafe
()
const
{
return
mtx_
!=
nullptr
;
}
void
BufferedAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
bool
BufferedAllocator
::
IsAllocThreadSafe
()
const
{
return
this
->
underlying_allocator_
->
IsAllocThreadSafe
();
}
void
BufferedAllocator
::
Free
(
Allocation
*
allocation
)
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
);
allocations_
.
emplace
(
allocation
->
size
(),
AllocationPtr
(
allocation
));
}
Allocation
*
BufferedAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
);
...
...
@@ -61,15 +61,17 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
if
(
it
!=
allocations_
.
end
()
&&
it
->
first
<
size
*
2
)
{
AllocationPtr
result
(
std
::
move
(
it
->
second
));
allocations_
.
erase
(
it
);
return
result
.
release
(
);
return
new
AllocationWithUnderlying
(
std
::
move
(
result
)
);
}
}
try
{
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
return
new
AllocationWithUnderlying
(
underlying_allocator_
->
Allocate
(
size
,
attr
));
}
catch
(
BadAlloc
&
)
{
FreeCache
(
size
);
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
return
new
AllocationWithUnderlying
(
underlying_allocator_
->
Allocate
(
size
,
attr
));
}
}
...
...
paddle/fluid/memory/allocation/buffered_allocator.h
浏览文件 @
32146857
...
...
@@ -31,7 +31,7 @@ namespace allocation {
// underlying_allocator_
class
BufferedAllocator
:
public
Allocator
{
public:
explicit
BufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
);
explicit
BufferedAllocator
(
std
::
unique_ptr
<
Allocator
>
&&
allocator
);
~
BufferedAllocator
();
...
...
@@ -44,11 +44,11 @@ class BufferedAllocator : public Allocator {
void
FreeCache
(
size_t
size
);
protected:
void
Free
Impl
(
Allocation
*
allocation
)
override
;
void
Free
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
std
::
shared
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
unique
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
multimap
<
size_t
,
AllocationPtr
>
allocations_
;
std
::
unique_ptr
<
std
::
mutex
>
mtx_
;
};
...
...
paddle/fluid/memory/allocation/buffered_allocator_test.cc
浏览文件 @
32146857
...
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/memory/allocation/buffered_allocator.h"
#include <gtest/gtest.h>
#include <memory>
#include <utility>
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
...
...
@@ -65,7 +66,7 @@ class StubAllocator : public Allocator {
size_t
GetFreeCount
()
const
{
return
destruct_count_
;
}
protected:
void
Free
Impl
(
Allocation
*
allocation
)
override
{
void
Free
(
Allocation
*
allocation
)
override
{
auto
*
alloc
=
dynamic_cast
<
StubAllocation
*>
(
allocation
);
PADDLE_ENFORCE_NOT_NULL
(
alloc
);
if
(
alloc
->
ptr
())
delete
[]
static_cast
<
uint8_t
*>
(
alloc
->
ptr
());
...
...
paddle/fluid/memory/allocation/cpu_allocator.cc
浏览文件 @
32146857
...
...
@@ -20,27 +20,25 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
CPUAllocation
::
CPUAllocation
(
void
*
ptr
,
size_t
size
)
:
Allocation
(
ptr
,
size
,
platform
::
CPUPlace
())
{}
bool
CPUAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
void
CPUAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
void
*
p
=
allocation
->
ptr
();
#ifdef _WIN32
_aligned_free
(
p
);
#else
free
(
p
);
#endif
void
CPUAllocator
::
Free
(
Allocation
*
allocation
)
{
PADDLE_ENFORCE_NOT_NULL
(
dynamic_cast
<
CPUAllocation
*>
(
allocation
));
free
(
allocation
->
ptr
());
delete
allocation
;
}
Allocation
*
CPUAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
void
*
p
;
#ifdef _WIN32
p
=
_aligned_malloc
(
size
,
kAlignment
);
#else
PADDLE_ENFORCE_EQ
(
posix_memalign
(
&
p
,
kAlignment
,
size
),
0
,
"Alloc %ld error!"
,
size
);
#endif
return
new
Allocation
(
p
,
size
,
platform
::
CPUPlace
());
void
*
ptr
;
auto
status
=
posix_memalign
(
&
ptr
,
kAlignment
,
size
);
if
(
UNLIKELY
(
status
)
!=
0
)
{
throw
BadAlloc
(
string
::
Sprintf
(
"Cannot allocate cpu memory %d. Errno is %d"
,
size
,
status
));
}
return
new
CPUAllocation
(
ptr
,
size
);
}
}
// namespace allocation
}
// namespace memory
...
...
paddle/fluid/memory/allocation/cpu_allocator.h
浏览文件 @
32146857
...
...
@@ -31,13 +31,19 @@ namespace allocation {
//
// NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import
// an open-sourced allocator into Paddle.
class
CPUAllocator
;
class
CPUAllocation
:
public
Allocation
{
public:
CPUAllocation
(
void
*
ptr
,
size_t
size
);
};
class
CPUAllocator
:
public
Allocator
{
public:
constexpr
static
size_t
kAlignment
=
4096UL
;
constexpr
static
size_t
kAlignment
=
64u
;
bool
IsAllocThreadSafe
()
const
override
;
protected:
void
Free
Impl
(
Allocation
*
allocation
)
override
;
void
Free
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
};
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/cuda_allocator.cc
浏览文件 @
32146857
...
...
@@ -23,14 +23,15 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
bool
CUDAAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
void
CUDAAllocator
::
Free
Impl
(
Allocation
*
allocation
)
{
void
CUDAAllocator
::
Free
(
Allocation
*
allocation
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
PADDLE_ENFORCE_EQ
(
boost
::
get
<
platform
::
CUDAPlace
>
(
allocation
->
place
()),
auto
*
cuda_allocation
=
dynamic_cast
<
CUDAAllocation
*>
(
allocation
);
PADDLE_ENFORCE_NOT_NULL
(
cuda_allocation
);
PADDLE_ENFORCE_EQ
(
boost
::
get
<
platform
::
CUDAPlace
>
(
cuda_allocation
->
place
()),
place_
);
PADDLE_ENFORCE
(
cudaFree
(
allocation
->
ptr
()));
delete
allocation
;
}
Allocation
*
CUDAAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
void
*
ptr
;
...
...
@@ -40,9 +41,8 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
"Cannot allocate %d on GPU %d, cuda status %d, %s"
,
size
,
place_
.
device
,
status
,
cudaGetErrorString
(
status
)));
}
return
new
Allocation
(
ptr
,
size
,
platform
::
Place
(
place_
));
return
new
CUDA
Allocation
(
ptr
,
size
,
platform
::
Place
(
place_
));
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cuda_allocator.h
浏览文件 @
32146857
...
...
@@ -20,6 +20,13 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// CUDA System allocator and allocation.
// Just a flag type.
class
CUDAAllocation
:
public
Allocation
{
public:
using
Allocation
::
Allocation
;
};
class
CUDAAllocator
:
public
Allocator
{
public:
explicit
CUDAAllocator
(
const
platform
::
CUDAPlace
&
place
)
:
place_
(
place
)
{}
...
...
@@ -28,7 +35,7 @@ class CUDAAllocator : public Allocator {
bool
IsAllocThreadSafe
()
const
override
;
protected:
void
Free
Impl
(
Allocation
*
allocation
)
override
;
void
Free
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
...
...
paddle/fluid/memory/allocation/legacy_allocator.cc
浏览文件 @
32146857
...
...
@@ -134,22 +134,26 @@ size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) {
}
#ifdef PADDLE_WITH_CUDA
class
GPUBuddyAllocatorList
{
public:
GPUBuddyAllocatorList
()
:
allocators_
(
platform
::
GetCUDADeviceCount
()),
flags_
(
platform
::
GetCUDADeviceCount
())
{
allocation
::
GPUMemMonitor
.
Initialize
(
allocators_
.
size
());
}
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
**
a_arr
=
nullptr
;
static
std
::
vector
<
int
>
devices
;
BuddyAllocator
*
Get
(
size_t
dev_id
)
{
PADDLE_ENFORCE
(
dev_id
<
flags_
.
size
(),
"Invalid device id %s"
,
dev_id
);
std
::
call_once
(
flags_
[
dev_id
],
[
this
,
dev_id
]
{
std
::
call_once
(
init_flag
,
[
gpu_id
]()
{
devices
=
platform
::
GetSelectedDevices
();
int
gpu_num
=
devices
.
size
();
allocation
::
GPUMemMonitor
.
Initialize
(
devices
.
size
());
a_arr
=
new
BuddyAllocator
*
[
gpu_num
];
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
int
dev_id
=
devices
[
i
];
a_arr
[
i
]
=
nullptr
;
platform
::
SetDeviceId
(
dev_id
);
allocators_
[
dev_id
]
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
a_arr
[
i
]
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
dev_id
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
VLOG
(
10
)
<<
"
\n\n
NOTE:
\n
"
<<
"You can set GFlags environment variable "
...
...
@@ -163,19 +167,13 @@ class GPUBuddyAllocatorList {
<<
FLAGS_initial_gpu_memory_in_mb
<<
". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
<<
FLAGS_reallocate_gpu_memory_in_mb
<<
"
\n\n
"
;
});
return
allocators_
[
dev_id
];
}
});
private:
std
::
vector
<
BuddyAllocator
*>
allocators_
;
std
::
vector
<
std
::
once_flag
>
flags_
;
};
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
static
GPUBuddyAllocatorList
allocators
;
platform
::
SetDeviceId
(
gpu_id
);
return
allocators
.
Get
(
gpu_id
);
auto
pos
=
std
::
distance
(
devices
.
begin
(),
std
::
find
(
devices
.
begin
(),
devices
.
end
(),
gpu_id
));
return
a_arr
[
pos
];
}
#endif
...
...
@@ -194,7 +192,7 @@ void *Alloc<platform::CUDAPlace>(const platform::CUDAPlace &place,
#ifdef PADDLE_WITH_CUDA
auto
*
buddy_allocator
=
GetGPUBuddyAllocator
(
place
.
device
);
auto
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
&&
size
>
0
)
{
if
(
ptr
==
nullptr
)
{
int
cur_dev
=
platform
::
GetCurrentDeviceId
();
platform
::
SetDeviceId
(
place
.
device
);
size_t
avail
,
total
;
...
...
@@ -349,7 +347,7 @@ Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
return
tmp_alloc
;
}
void
LegacyAllocator
::
Free
Impl
(
Allocation
*
allocation
)
{
void
LegacyAllocator
::
Free
(
Allocation
*
allocation
)
{
boost
::
apply_visitor
(
legacy
::
FreeVisitor
(
allocation
->
ptr
(),
allocation
->
size
()),
allocation
->
place
());
...
...
paddle/fluid/memory/allocation/legacy_allocator.h
浏览文件 @
32146857
...
...
@@ -73,7 +73,7 @@ class LegacyAllocator : public Allocator {
protected:
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
void
Free
Impl
(
Allocation
*
allocation
)
override
;
void
Free
(
Allocation
*
allocation
)
override
;
private:
platform
::
Place
place_
;
...
...
paddle/fluid/memory/allocation/locked_allocator.cc
浏览文件 @
32146857
...
...
@@ -17,7 +17,6 @@
#include <utility>
#include "paddle/fluid/memory/allocation/allocation_with_underlying.h"
#include "paddle/fluid/platform/lock_guard_ptr.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
...
...
@@ -25,24 +24,26 @@ namespace allocation {
bool
LockedAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
LockedAllocator
::
LockedAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
)
std
::
unique_ptr
<
Allocator
>
&&
underlying_allocator
)
:
underlying_allocator_
(
std
::
move
(
underlying_allocator
))
{
PADDLE_ENFORCE_NOT_NULL
(
underlying_allocator_
);
if
(
!
underlying_allocator_
->
IsAllocThreadSafe
())
{
mtx_
.
reset
(
new
std
::
mutex
());
}
}
void
LockedAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
void
LockedAllocator
::
Free
(
Allocation
*
allocation
)
{
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
);
underlying_allocator_
->
Free
(
allocation
);
reinterpret_cast
<
AllocationWithUnderlying
*>
(
allocation
)
->
allocation_
.
reset
();
// Destroy inner allocation
}
delete
allocation
;
}
Allocation
*
LockedAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
);
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
return
new
AllocationWithUnderlying
(
underlying_allocator_
->
Allocate
(
size
,
attr
));
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/locked_allocator.h
浏览文件 @
32146857
...
...
@@ -24,15 +24,15 @@ namespace allocation {
// A allocator to make underlying allocator thread safe.
class
LockedAllocator
:
public
Allocator
{
public:
explicit
LockedAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
);
explicit
LockedAllocator
(
std
::
unique_ptr
<
Allocator
>
&&
underlying_allocator
);
bool
IsAllocThreadSafe
()
const
override
;
protected:
void
Free
Impl
(
Allocation
*
allocation
)
override
;
void
Free
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
std
::
shared
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
unique
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
unique_ptr
<
std
::
mutex
>
mtx_
;
};
...
...
paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc
已删除
100644 → 0
浏览文件 @
d980ba19
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_cuda_pinned_memory_to_use
);
DECLARE_int64
(
gpu_allocator_retry_time
);
#endif
DECLARE_string
(
allocator_strategy
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
TEST
(
allocator
,
allocator
)
{
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use
=
0.01
;
FLAGS_gpu_allocator_retry_time
=
500
;
FLAGS_fraction_of_cuda_pinned_memory_to_use
=
0.5
;
#endif
FLAGS_allocator_strategy
=
"naive_best_fit"
;
auto
&
instance
=
AllocatorFacade
::
Instance
();
platform
::
Place
place
;
size_t
size
=
1024
;
{
place
=
platform
::
CPUPlace
();
size
=
1024
;
auto
cpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
cpu_allocation
,
nullptr
);
ASSERT_NE
(
cpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cpu_allocation
->
place
(),
place
);
ASSERT_EQ
(
cpu_allocation
->
size
(),
size
);
}
#ifdef PADDLE_WITH_CUDA
{
place
=
platform
::
CUDAPlace
(
0
);
size
=
1024
;
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
size
);
}
{
// Allocate 2GB gpu memory
place
=
platform
::
CUDAPlace
(
0
);
size
=
2
*
static_cast
<
size_t
>
(
1
<<
30
);
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
size
);
}
{
place
=
platform
::
CUDAPinnedPlace
();
size
=
(
1
<<
20
);
auto
cuda_pinned_allocation
=
instance
.
Alloc
(
platform
::
CUDAPinnedPlace
(),
1
<<
20
);
ASSERT_NE
(
cuda_pinned_allocation
,
nullptr
);
ASSERT_NE
(
cuda_pinned_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cuda_pinned_allocation
->
place
(),
place
);
ASSERT_GE
(
cuda_pinned_allocation
->
size
(),
size
);
}
#endif
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/pinned_allocator.cc
浏览文件 @
32146857
...
...
@@ -20,15 +20,20 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
bool
CPUPinnedAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
void
CPUPinnedAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
void
CPUPinnedAllocator
::
Free
(
Allocation
*
allocation
)
{
PADDLE_ENFORCE_NOT_NULL
(
dynamic_cast
<
CPUPinnedAllocation
*>
(
allocation
));
PADDLE_ENFORCE
(
cudaFreeHost
(
allocation
->
ptr
()));
delete
allocation
;
}
Allocation
*
CPUPinnedAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
// PADDLE_ENFORCE_EQ(
// attr, kCrossDevice,
// "CPUPinnedAllocator should be used for Cross-Device Communication");
void
*
ptr
;
PADDLE_ENFORCE
(
cudaHostAlloc
(
&
ptr
,
size
,
cudaHostAllocPortable
));
return
new
Allocation
(
ptr
,
size
,
platform
::
CUDAPinnedPlace
()
);
return
new
CPUPinnedAllocation
(
ptr
,
size
);
}
}
// namespace allocation
}
// namespace memory
...
...
paddle/fluid/memory/allocation/pinned_allocator.h
浏览文件 @
32146857
...
...
@@ -20,12 +20,18 @@ namespace memory {
namespace
allocation
{
// Allocator uses `cudaHostAlloc`
class
CPUPinnedAllocation
:
public
Allocation
{
public:
CPUPinnedAllocation
(
void
*
ptr
,
size_t
size
)
:
Allocation
(
ptr
,
size
,
platform
::
CUDAPinnedPlace
())
{}
};
class
CPUPinnedAllocator
:
public
Allocator
{
public:
bool
IsAllocThreadSafe
()
const
override
;
protected:
void
Free
Impl
(
Allocation
*
allocation
)
override
;
void
Free
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
};
...
...
paddle/fluid/memory/allocation/retry_allocator.cc
浏览文件 @
32146857
...
...
@@ -18,15 +18,25 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
void
RetryAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
bool
RetryAllocator
::
IsAllocThreadSafe
()
const
{
return
underlying_allocator_
->
IsAllocThreadSafe
();
}
void
RetryAllocator
::
Free
(
Allocation
*
allocation
)
{
// Delete underlying allocation first.
underlying_allocator_
->
Free
(
allocation
);
reinterpret_cast
<
AllocationWithUnderlying
*>
(
allocation
)
->
allocation_
.
reset
();
{
// notify all waited allocators, they can try to allocate memory after free.
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
cv_
.
notify_all
();
}
delete
allocation
;
}
Allocation
*
RetryAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
auto
alloc_func
=
[
&
,
this
]()
{
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
return
new
AllocationWithUnderlying
(
underlying_allocator_
->
Allocate
(
size
,
attr
));
};
// In fact, we can unify the code of allocation success and failure
// But it would add lock even when allocation success at the first time
...
...
paddle/fluid/memory/allocation/retry_allocator.h
浏览文件 @
32146857
...
...
@@ -25,25 +25,32 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
class
RetryAllocator
;
class
RetryAllocator
:
public
Allocator
{
public:
RetryAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
,
size_t
retry_ms
)
RetryAllocator
(
std
::
unique_ptr
<
Allocator
>&&
allocator
,
size_t
retry_ms
)
:
underlying_allocator_
(
std
::
move
(
allocator
)),
retry_time_
(
retry_ms
)
{
EnforceCheck
();
}
bool
IsAllocThreadSafe
()
const
override
;
private:
void
EnforceCheck
()
{
PADDLE_ENFORCE_NOT_NULL
(
underlying_allocator_
,
"UnderlyingAllocator of RetryAllocator must
not be null
"
);
underlying_allocator_
.
get
()
,
"UnderlyingAllocator of RetryAllocator must
be UnmanagedAllocator
"
);
PADDLE_ENFORCE
(
underlying_allocator_
->
IsAllocThreadSafe
(),
"UnderlyingAllocator of RetryAllocator must be thread-safe"
);
}
bool
IsAllocThreadSafe
()
const
override
{
return
true
;
}
protected:
void
Free
Impl
(
Allocation
*
allocation
)
override
;
void
Free
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
private:
std
::
shared
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
unique
_ptr
<
Allocator
>
underlying_allocator_
;
std
::
chrono
::
milliseconds
retry_time_
;
std
::
mutex
mutex_
;
std
::
condition_variable
cv_
;
...
...
@@ -51,6 +58,8 @@ class RetryAllocator : public Allocator {
// For debug, We can add an atomic integer to record how many memory sizes are
// waited to allocate
// std::atomic<size_t> waited_allocate_size_{0};
friend
class
RetryAllocation
;
};
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/zero_size_allocator.cc
浏览文件 @
32146857
...
...
@@ -24,20 +24,11 @@ bool ZeroSizeAllocator::IsAllocThreadSafe() const {
Allocation
*
ZeroSizeAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
if
(
size
==
0
)
{
return
new
Allocation
(
nullptr
,
0
,
place_
);
return
new
ZeroSizeAllocation
(
place_
);
}
else
{
return
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
}
}
void
ZeroSizeAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
if
(
allocation
->
size
()
==
0
)
{
delete
allocation
;
}
else
{
underlying_allocator_
->
Free
(
allocation
);
}
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/zero_size_allocator.h
浏览文件 @
32146857
...
...
@@ -24,6 +24,12 @@ namespace allocation {
// The allocator handles the request's size is zero. Allocator will always
// return an allocation even the request size is zero. However, the
// allocation.ptr() is nullptr
class
ZeroSizeAllocation
:
public
Allocation
{
public:
explicit
ZeroSizeAllocation
(
const
platform
::
Place
&
p
)
:
Allocation
(
nullptr
,
0
,
p
)
{}
};
class
ZeroSizeAllocator
:
public
Allocator
{
public:
ZeroSizeAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
,
...
...
@@ -34,7 +40,6 @@ class ZeroSizeAllocator : public Allocator {
protected:
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
void
FreeImpl
(
Allocation
*
allocation
)
override
;
private:
std
::
shared_ptr
<
Allocator
>
underlying_allocator_
;
...
...
paddle/fluid/operators/alloc_continuous_space_op.cc
浏览文件 @
32146857
...
...
@@ -65,7 +65,8 @@ class AllocContinuousSpaceKernel : public framework::OpKernel<T> {
// Get numel and dtype
size_t
numel
=
0
;
auto
dtype
=
kDefaultDtype
;
GetMemSizeAndDtype
(
in_tensors
,
in_var_names
,
&
numel
,
&
dtype
);
GetMemSizeAndDtype
(
in_tensors
,
in_var_names
,
&
numel
,
&
dtype
,
context
.
GetPlace
());
// Alloc the continuous space
auto
fused_tensor
=
context
.
Output
<
framework
::
LoDTensor
>
(
"FusedOutput"
);
...
...
@@ -74,14 +75,18 @@ class AllocContinuousSpaceKernel : public framework::OpKernel<T> {
// Init the continuous space
auto
out_tensors
=
context
.
MultiOutput
<
framework
::
LoDTensor
>
(
"Output"
);
int64_t
offset
=
0
;
size_t
offset
=
0
;
size_t
size_of_dtype
=
framework
::
SizeOfType
(
dtype
);
if
(
context
.
Attr
<
bool
>
(
"copy_data"
))
{
for
(
size_t
i
=
0
;
i
<
in_var_names
.
size
();
++
i
)
{
int64_t
len
=
out_tensors
[
i
]
->
numel
(
);
auto
sub_tensor
=
fused_tensor
->
Slice
(
offset
,
offset
+
len
);
offset
+=
len
;
framework
::
TensorCopy
(
*
out
_tensors
[
i
],
context
.
GetPlace
(),
dev_ctx
,
size_t
len
=
static_cast
<
size_t
>
(
in_tensors
[
i
]
->
numel
()
);
auto
sub_tensor
=
fused_tensor
->
Slice
(
static_cast
<
int64_t
>
(
offset
),
static_cast
<
int64_t
>
(
offset
+
len
))
;
framework
::
TensorCopy
(
*
in
_tensors
[
i
],
context
.
GetPlace
(),
dev_ctx
,
&
sub_tensor
);
offset
+=
Alignment
(
len
*
size_of_dtype
,
context
.
GetPlace
())
/
size_of_dtype
;
}
}
else
if
(
context
.
Attr
<
bool
>
(
"set_constant"
))
{
math
::
SetConstant
<
DeviceContext
,
T
>
set_constant
;
...
...
@@ -92,11 +97,13 @@ class AllocContinuousSpaceKernel : public framework::OpKernel<T> {
// Make the outputs point to the continuous space.
offset
=
0
;
for
(
size_t
i
=
0
;
i
<
out_tensors
.
size
();
++
i
)
{
int64_t
len
=
out_tensors
[
i
]
->
numel
(
);
size_t
len
=
static_cast
<
size_t
>
(
out_tensors
[
i
]
->
numel
()
);
auto
dim
=
out_tensors
[
i
]
->
dims
();
out_tensors
[
i
]
->
ShareDataWith
(
fused_tensor
->
Slice
(
offset
,
offset
+
len
))
->
ShareDataWith
(
fused_tensor
->
Slice
(
static_cast
<
int64_t
>
(
offset
),
static_cast
<
int64_t
>
(
offset
+
len
)))
.
Resize
(
dim
);
len
=
Alignment
(
len
*
size_of_dtype
,
context
.
GetPlace
())
/
size_of_dtype
;
offset
+=
len
;
VLOG
(
10
)
<<
"alloc_space_for_vars: output("
<<
out_var_names
[
i
]
<<
") ,dim:("
<<
dim
<<
")"
...
...
@@ -104,12 +111,28 @@ class AllocContinuousSpaceKernel : public framework::OpKernel<T> {
}
}
private:
// Note(zcd): Addresses should be aligned, otherwise, the results may have
// diff.
size_t
Alignment
(
size_t
size
,
const
platform
::
Place
&
place
)
const
{
// Allow to allocate the minimum chunk size is 4 KB.
size_t
alignment
=
1
<<
12
;
if
(
platform
::
is_gpu_place
(
place
))
{
// Allow to allocate the minimum chunk size is 256 B.
alignment
=
1
<<
8
;
}
size_t
remaining
=
size
%
alignment
;
return
remaining
==
0
?
size
:
size
+
(
alignment
-
remaining
);
}
void
GetMemSizeAndDtype
(
const
std
::
vector
<
const
framework
::
LoDTensor
*>
&
lod_tensors
,
const
std
::
vector
<
std
::
string
>
var_names
,
size_t
*
numel
,
framework
::
proto
::
VarType
::
Type
*
dtype
)
const
{
framework
::
proto
::
VarType
::
Type
*
dtype
,
const
platform
::
Place
&
place
)
const
{
PADDLE_ENFORCE_EQ
(
lod_tensors
.
size
(),
var_names
.
size
());
*
numel
=
0
;
size_t
size_of_dtype
=
0
;
for
(
size_t
i
=
0
;
i
<
var_names
.
size
();
++
i
)
{
PADDLE_ENFORCE
(
lod_tensors
[
i
]
->
IsInitialized
(),
"%s is not initialized."
,
var_names
[
i
]);
...
...
@@ -119,6 +142,7 @@ class AllocContinuousSpaceKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_NE
(
p_dtype
,
kDefaultDtype
,
"%s's type should not be %s."
,
var_names
[
i
],
kDefaultDtype
);
*
dtype
=
p_dtype
;
size_of_dtype
=
framework
::
SizeOfType
(
p_dtype
);
}
PADDLE_ENFORCE_EQ
(
p_dtype
,
*
dtype
,
"Input vars is not equal."
);
...
...
@@ -126,7 +150,8 @@ class AllocContinuousSpaceKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_GT
(
size
,
0
);
VLOG
(
10
)
<<
"alloc_space_for_vars: input("
<<
var_names
[
i
]
<<
") ,dim:("
<<
lod_tensors
[
i
]
->
dims
()
<<
")"
;
*
numel
+=
size
;
*
numel
+=
Alignment
(
static_cast
<
size_t
>
(
size
)
*
size_of_dtype
,
place
)
/
size_of_dtype
;
}
}
};
...
...
paddle/fluid/operators/bpr_loss_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/bpr_loss_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -127,6 +128,23 @@ neural networks>(https://arxiv.org/abs/1511.06939)
)DOC"
);
}
};
class
BprLossGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"bpr_loss_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
"Label"
,
Input
(
"Label"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Y"
),
OutputGrad
(
"Y"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -134,7 +152,7 @@ namespace ops = paddle::operators;
using
CPUCtx
=
paddle
::
platform
::
CPUDeviceContext
;
REGISTER_OPERATOR
(
bpr_loss
,
ops
::
BprLossOp
,
ops
::
BprLossOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
BprLossGradDescMaker
);
REGISTER_OPERATOR
(
bpr_loss_grad
,
ops
::
BprLossGradientOp
);
REGISTER_OP_CPU_KERNEL
(
bpr_loss
,
ops
::
BprLossOpKernel
<
CPUCtx
,
float
>
,
ops
::
BprLossOpKernel
<
CPUCtx
,
double
>
);
...
...
paddle/fluid/operators/detection/roi_perspective_transform_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include <memory>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
...
...
@@ -568,13 +569,31 @@ class ROIPerspectiveTransformOpMaker
}
};
class
ROIPerspectiveTransformGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"roi_perspective_transform_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
"ROIs"
,
Input
(
"ROIs"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
roi_perspective_transform
,
ops
::
ROIPerspectiveTransformOp
,
ops
::
ROIPerspectiveTransformOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
ROIPerspectiveTransformGradDescMaker
);
REGISTER_OPERATOR
(
roi_perspective_transform_grad
,
ops
::
ROIPerspectiveTransformGradOp
);
REGISTER_OP_CPU_KERNEL
(
roi_perspective_transform
,
...
...
paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
浏览文件 @
32146857
...
...
@@ -77,7 +77,8 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
}
else
{
functor
.
RunMidWise
(
n
,
pre
,
post
);
}
z
->
set_mkldnn_prim_desc
(
x
->
get_mkldnn_prim_desc
());
z
->
set_layout
(
DataLayout
::
kMKLDNN
);
z
->
set_format
(
x
->
format
());
}
else
{
PADDLE_ENFORCE
(
x
->
layout
()
==
DataLayout
::
kMKLDNN
&&
x
->
format
()
!=
memory
::
format
::
format_undef
,
...
...
@@ -115,8 +116,7 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
auto
sum_pd
=
sum
::
primitive_desc
(
dst_md
,
scales
,
srcs_pd
);
// create mkldnn memory for dst
auto
dst_mem_pd
=
sum_pd
.
dst_primitive_desc
();
memory
dst_memory
=
memory
(
dst_mem_pd
,
z_data
);
memory
dst_memory
=
memory
(
sum_pd
.
dst_primitive_desc
(),
z_data
);
std
::
vector
<
primitive
::
at
>
inputs
;
inputs
.
push_back
(
srcs
[
0
]);
...
...
@@ -129,7 +129,9 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
pipeline
.
push_back
(
sum_prim
);
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
z
->
set_mkldnn_prim_desc
(
dst_mem_pd
);
z
->
set_layout
(
DataLayout
::
kMKLDNN
);
z
->
set_format
(
(
memory
::
format
)
dst_memory
.
get_primitive_desc
().
desc
().
data
.
format
);
}
}
};
...
...
@@ -150,19 +152,24 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
auto
*
out
=
dout
;
auto
*
x
=
dout
,
*
y
=
dout
;
auto
set_mkldnn_format
=
[](
Tensor
*
in
,
const
Tensor
*
out
)
{
in
->
set_layout
(
DataLayout
::
kMKLDNN
);
in
->
set_format
(
out
->
format
());
};
if
(
dx
!=
nullptr
&&
dy
!=
nullptr
&&
dx
->
dims
()
==
dy
->
dims
())
{
if
(
dx
->
dims
()
==
dy
->
dims
())
{
auto
blas
=
math
::
GetBlas
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
(
ctx
);
if
(
dx
)
{
blas
.
VCOPY
(
dout
->
numel
(),
dout
->
data
<
T
>
(),
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
dx
->
set_mkldnn_prim_desc
(
dout
->
get_mkldnn_prim_desc
()
);
set_mkldnn_format
(
dx
,
dout
);
}
if
(
dy
)
{
blas
.
VCOPY
(
dout
->
numel
(),
dout
->
data
<
T
>
(),
dy
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
dy
->
set_mkldnn_prim_desc
(
dout
->
get_mkldnn_prim_desc
()
);
set_mkldnn_format
(
dy
,
dout
);
}
}
}
else
{
...
...
paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
浏览文件 @
32146857
...
...
@@ -65,11 +65,17 @@ by input arguments.
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
GaussianRandomBatchSizeLikeNoNeedBufferVarsInference
,
"Input"
);
}
// namespace operators
}
// namespace paddle
REGISTER_OP
_WITHOUT_GRADIENT
(
REGISTER_OP
ERATOR
(
gaussian_random_batch_size_like
,
paddle
::
operators
::
GaussianRandomBatchSizeLikeOp
,
paddle
::
operators
::
GaussianRandomBatchSizeLikeOpMaker
);
paddle
::
operators
::
GaussianRandomBatchSizeLikeOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
,
paddle
::
operators
::
GaussianRandomBatchSizeLikeNoNeedBufferVarsInference
);
// Kernels are registered in gaussian_random_op.cc and gaussian_random_op.cu
paddle/fluid/operators/im2sequence_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/im2sequence_op.h"
#include <memory>
#include <string>
#include <vector>
...
...
@@ -146,12 +147,28 @@ class Im2SequenceGradOp : public framework::OperatorWithKernel {
}
};
class
Im2SequenceGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"im2sequence_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
im2sequence
,
ops
::
Im2SequenceOp
,
ops
::
Im2SequenceOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
Im2SequenceGradDescMaker
);
REGISTER_OPERATOR
(
im2sequence_grad
,
ops
::
Im2SequenceGradOp
);
REGISTER_OP_CPU_KERNEL
(
im2sequence
,
...
...
paddle/fluid/operators/interpolate_op.cc
浏览文件 @
32146857
...
...
@@ -10,6 +10,7 @@
limitations under the License. */
#include "paddle/fluid/operators/interpolate_op.h"
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
...
...
@@ -194,21 +195,46 @@ class InterpolateOpGrad : public framework::OperatorWithKernel {
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
(),
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
(),
ctx
.
GetPlace
());
}
};
class
InterpolateGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
ForwardOp
().
Type
()
+
"_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
if
(
ForwardOp
().
Inputs
().
count
(
"OutSize"
)
>
0
)
{
op
->
SetInput
(
"OutSize"
,
Input
(
"OutSize"
));
}
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
InterpolateGradNoNeedBufferVarsInference
,
"X"
);
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
bilinear_interp
,
ops
::
InterpolateOp
,
ops
::
InterpolateOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
bilinear_interp_grad
,
ops
::
InterpolateOpGrad
);
ops
::
InterpolateGradDescMaker
);
REGISTER_OPERATOR
(
bilinear_interp_grad
,
ops
::
InterpolateOpGrad
,
ops
::
InterpolateGradNoNeedBufferVarsInference
);
REGISTER_OPERATOR
(
nearest_interp
,
ops
::
InterpolateOp
,
ops
::
InterpolateOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
nearest_interp_grad
,
ops
::
InterpolateOpGrad
);
ops
::
InterpolateGradDescMaker
);
REGISTER_OPERATOR
(
nearest_interp_grad
,
ops
::
InterpolateOpGrad
,
ops
::
InterpolateGradNoNeedBufferVarsInference
);
REGISTER_OP_CPU_KERNEL
(
bilinear_interp
,
ops
::
InterpolateKernel
<
float
>
,
ops
::
InterpolateKernel
<
double
>
,
ops
::
InterpolateKernel
<
uint8_t
>
);
...
...
paddle/fluid/operators/l1_norm_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/l1_norm_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -62,12 +63,28 @@ $$Out = \sum{|X|}$$
}
};
class
L1NormGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"l1_norm_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
l1_norm
,
ops
::
L1NormOp
,
ops
::
L1NormOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
L1NormGradDescMaker
);
REGISTER_OPERATOR
(
l1_norm_grad
,
ops
::
L1NormGradOp
);
REGISTER_OP_CPU_KERNEL
(
l1_norm
,
ops
::
L1NormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/label_smooth_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/label_smooth_op.h"
#include <memory>
#include <string>
namespace
paddle
{
...
...
@@ -105,10 +106,23 @@ class LabelSmoothGradOp : public framework::OperatorWithKernel {
:
OperatorWithKernel
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) shouldn't be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) shouldn't be null."
);
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
)));
}
};
class
LabelSmoothGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"label_smooth_grad"
);
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
...
...
@@ -117,7 +131,7 @@ class LabelSmoothGradOp : public framework::OperatorWithKernel {
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
label_smooth
,
ops
::
LabelSmoothOp
,
ops
::
LabelSmoothOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
LabelSmoothGradDescMaker
);
REGISTER_OPERATOR
(
label_smooth_grad
,
ops
::
LabelSmoothGradOp
);
REGISTER_OP_CPU_KERNEL
(
label_smooth
,
...
...
paddle/fluid/operators/linear_chain_crf_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/linear_chain_crf_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -250,14 +251,46 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel {
}
};
class
LinearChainCRFGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"linear_chain_crf_grad"
);
op
->
SetAttrMap
(
Attrs
());
op
->
SetInput
(
"Emission"
,
Input
(
"Emission"
));
op
->
SetInput
(
"Transition"
,
Input
(
"Transition"
));
op
->
SetInput
(
"Label"
,
Input
(
"Label"
));
op
->
SetInput
(
"Alpha"
,
Output
(
"Alpha"
));
op
->
SetInput
(
"EmissionExps"
,
Output
(
"EmissionExps"
));
op
->
SetInput
(
"TransitionExps"
,
Output
(
"TransitionExps"
));
op
->
SetInput
(
framework
::
GradVarName
(
"LogLikelihood"
),
OutputGrad
(
"LogLikelihood"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Emission"
),
InputGrad
(
"Emission"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Transition"
),
InputGrad
(
"Transition"
));
return
op
;
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
LinearChainCRFGradNoNeedBufferVarsInference
,
"Transition"
,
"Emission"
);
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
linear_chain_crf
,
ops
::
LinearChainCRFOp
,
ops
::
LinearChainCRFOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
linear_chain_crf_grad
,
ops
::
LinearChainCRFGradOp
);
ops
::
LinearChainCRFOpMaker
,
ops
::
LinearChainCRFGradDescMaker
);
REGISTER_OPERATOR
(
linear_chain_crf_grad
,
ops
::
LinearChainCRFGradOp
,
ops
::
LinearChainCRFGradNoNeedBufferVarsInference
);
REGISTER_OP_CPU_KERNEL
(
linear_chain_crf
,
ops
::
LinearChainCRFOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/log_loss_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/log_loss_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -100,12 +101,29 @@ class LogLossGradOp : public framework::OperatorWithKernel {
}
};
class
LogLossGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"log_loss_grad"
);
op
->
SetInput
(
"Predicted"
,
Input
(
"Predicted"
));
op
->
SetInput
(
"Labels"
,
Input
(
"Labels"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Loss"
),
OutputGrad
(
"Loss"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Predicted"
),
InputGrad
(
"Predicted"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
log_loss
,
ops
::
LogLossOp
,
ops
::
LogLossOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
LogLossGradDescMaker
);
REGISTER_OPERATOR
(
log_loss_grad
,
ops
::
LogLossGradOp
);
REGISTER_OP_CPU_KERNEL
(
log_loss
,
ops
::
LogLossKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/lstm_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/lstm_op.h"
#include <memory>
#include <string>
namespace
paddle
{
...
...
@@ -264,12 +265,51 @@ class LSTMGradOp : public framework::OperatorWithKernel {
}
};
class
LSTMGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"lstm_grad"
);
op
->
SetAttrMap
(
Attrs
());
op
->
SetInput
(
"Input"
,
Input
(
"Input"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Input"
),
InputGrad
(
"Input"
));
if
(
ForwardOp
().
Inputs
().
count
(
"H0"
)
>
0
)
{
op
->
SetInput
(
"H0"
,
Input
(
"H0"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"H0"
),
InputGrad
(
"H0"
));
}
if
(
ForwardOp
().
Inputs
().
count
(
"C0"
)
>
0
)
{
op
->
SetInput
(
"C0"
,
Input
(
"C0"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"C0"
),
InputGrad
(
"C0"
));
}
op
->
SetInput
(
"Weight"
,
Input
(
"Weight"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Weight"
),
InputGrad
(
"Weight"
));
op
->
SetInput
(
"Bias"
,
Input
(
"Bias"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Bias"
),
InputGrad
(
"Bias"
));
op
->
SetInput
(
"Cell"
,
Output
(
"Cell"
));
op
->
SetInput
(
"Hidden"
,
Output
(
"Hidden"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Hidden"
),
OutputGrad
(
"Hidden"
));
op
->
SetInput
(
"BatchGate"
,
Output
(
"BatchGate"
));
op
->
SetInput
(
"BatchCellPreAct"
,
Output
(
"BatchCellPreAct"
));
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
lstm
,
ops
::
LSTMOp
,
ops
::
LSTMOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
LSTMGradOpDescMaker
);
REGISTER_OPERATOR
(
lstm_grad
,
ops
::
LSTMGradOp
);
REGISTER_OP_CPU_KERNEL
(
lstm
,
ops
::
LSTMKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/margin_rank_loss_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/margin_rank_loss_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -94,8 +95,6 @@ class MarginRankLossGradOp : public framework::OperatorWithKernel {
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Label"
),
"Input(Label) shouldn't be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X1"
),
"Input(X1) shouldn't be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X2"
),
"Input(X2) shouldn't be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) shouldn't be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Activated"
),
...
...
@@ -106,13 +105,31 @@ class MarginRankLossGradOp : public framework::OperatorWithKernel {
}
};
class
MarginRankLossGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"margin_rank_loss_grad"
);
op
->
SetInput
(
"Activated"
,
Output
(
"Activated"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetInput
(
"Label"
,
Input
(
"Label"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X1"
),
InputGrad
(
"X1"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X2"
),
InputGrad
(
"X2"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
margin_rank_loss
,
ops
::
MarginRankLossOp
,
ops
::
MarginRankLossOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
MarginRankLossGradDescMaker
);
REGISTER_OPERATOR
(
margin_rank_loss_grad
,
ops
::
MarginRankLossGradOp
);
REGISTER_OP_CPU_KERNEL
(
margin_rank_loss
,
...
...
paddle/fluid/operators/mean_op.cc
浏览文件 @
32146857
...
...
@@ -13,7 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/mean_op.h"
#include <memory>
#include <string>
#include <unordered_map>
namespace
paddle
{
namespace
operators
{
...
...
@@ -61,7 +64,8 @@ class MeanGradOp : public framework::OperatorWithKernel {
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
input_data_type
=
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
();
auto
input_data_type
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
();
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
());
}
};
...
...
@@ -81,13 +85,16 @@ class MeanGradMaker : public framework::SingleGradOpDescMaker {
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
MeanGradNoNeedBufferVarsInference
,
"X"
);
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
mean
,
ops
::
MeanOp
,
ops
::
MeanOpMaker
,
ops
::
MeanOpInferVarType
,
ops
::
MeanGradMaker
);
REGISTER_OPERATOR
(
mean_grad
,
ops
::
MeanGradOp
);
REGISTER_OPERATOR
(
mean_grad
,
ops
::
MeanGradOp
,
ops
::
MeanGradNoNeedBufferVarsInference
);
REGISTER_OP_CPU_KERNEL
(
mean
,
ops
::
MeanKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
MeanKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
...
...
paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
浏览文件 @
32146857
...
...
@@ -96,7 +96,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
std
::
vector
<
int
>
src_tz
=
framework
::
vectorize2int
(
x
->
dims
());
auto
src_format
=
x
->
format
();
auto
src_format
=
src_tz
.
size
()
==
2
?
mkldnn
::
memory
::
format
::
nc
:
x
->
format
();
const
std
::
string
key
=
gethash
(
src_tz
,
algorithm
);
const
std
::
string
key_src_data
=
...
...
@@ -126,8 +127,10 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
if
(
p_fwd
==
nullptr
)
{
// create mkldnn memory for input X
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
src_format
);
auto
src_memory
=
std
::
shared_ptr
<
memory
>
(
new
memory
(
x
->
get_mkldnn_prim_desc
()
,
to_void_cast
(
x_data
)));
new
memory
(
{
src_md
,
mkldnn_engine
}
,
to_void_cast
(
x_data
)));
// save src_memory to be referred in backward path
dev_ctx
.
SetBlob
(
key_src_mem
,
src_memory
);
...
...
@@ -174,7 +177,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
pipeline
.
push_back
(
*
p_fwd
);
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
y
->
set_mkldnn_prim_desc
(
dst_memory
->
get_primitive_desc
());
y
->
set_layout
(
DataLayout
::
kMKLDNN
);
y
->
set_format
(
GetMKLDNNFormat
(
*
dst_memory
));
}
template
<
typename
T
>
...
...
@@ -192,6 +196,9 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
std
::
vector
<
int
>
diff_dst_tz
=
framework
::
vectorize2int
(
diff_y
->
dims
());
auto
diff_y_format
=
diff_dst_tz
.
size
()
==
2
?
mkldnn
::
memory
::
format
::
nc
:
diff_y
->
format
();
const
std
::
string
key
=
gethash
(
diff_dst_tz
,
algorithm
);
const
std
::
string
key_src_data
=
key
+
ctx
.
op
().
Input
(
"Out"
)
+
"@eltwise_fwd_src_data"
;
...
...
@@ -203,8 +210,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
key
+
std
::
to_string
(
*
p_src_layout
)
+
"@eltwise_fwd_src_mem"
;
const
std
::
string
key_fwd_pd
=
key
+
std
::
to_string
(
*
p_src_layout
)
+
"@eltwise_fwd_pd"
;
const
std
::
string
key_with_layouts
=
key
+
std
::
to_string
(
*
p_src_layout
)
+
"-"
+
std
::
to_string
(
diff_y
->
format
()
);
const
std
::
string
key_with_layouts
=
key
+
std
::
to_string
(
*
p_src_layout
)
+
"-"
+
std
::
to_string
(
diff_y_format
);
const
std
::
string
key_diff_src_mem
=
key_with_layouts
+
"@eltwise_diff_src_mem"
;
const
std
::
string
key_diff_dst_mem
=
...
...
@@ -227,8 +234,10 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
if
(
p_grad
==
nullptr
)
{
// create mkldnn memory for input diff_y
auto
diff_dst_md
=
platform
::
MKLDNNMemDesc
(
diff_dst_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
diff_y_format
);
auto
diff_dst_memory
=
std
::
shared_ptr
<
memory
>
(
new
memory
(
diff_y
->
get_mkldnn_prim_desc
()
,
to_void_cast
(
diff_y_data
)));
new
memory
(
{
diff_dst_md
,
mkldnn_engine
}
,
to_void_cast
(
diff_y_data
)));
dev_ctx
.
SetBlob
(
key_diff_dst_mem
,
diff_dst_memory
);
// retrieve eltwise primitive desc from device context
...
...
@@ -272,7 +281,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
pipeline
.
push_back
(
*
p_grad
);
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
diff_x
->
set_mkldnn_prim_desc
(
diff_src_memory
->
get_primitive_desc
());
diff_x
->
set_layout
(
DataLayout
::
kMKLDNN
);
diff_x
->
set_format
(
GetMKLDNNFormat
(
*
diff_src_memory
));
}
template
<
typename
T
,
mkldnn
::
algorithm
algorithm
>
...
...
paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc
浏览文件 @
32146857
...
...
@@ -206,14 +206,17 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
if
(
fuse_with_relu
)
flags
|=
mkldnn
::
fuse_bn_relu
;
// create mkldnn memory from input x tensor
mkldnn
::
memory
::
format
input_format
=
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
x
->
format
());
// keys for backward pass
const
std
::
string
key
=
BatchNormMKLDNNHandler
::
GetHash
(
src_tz
,
epsilon
,
flags
,
global_stats
,
x
->
format
()
,
src_tz
,
epsilon
,
flags
,
global_stats
,
input_format
,
ctx
.
op
().
Output
(
"SavedMean"
));
const
std
::
string
key_batch_norm_fwd_pd
=
key
+
"@bn_fwd_pd"
;
auto
user_src_md
=
x
->
get_mkldnn_prim_desc
().
desc
();
auto
user_src_md
=
platform
::
MKLDNNMemDesc
(
{
src_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
input_format
);
// create primitive descriptor for batch norm forward
using
bn_fwd_types
=
bn_type_traits
<
mkldnn
::
batch_normalization_forward
>
;
...
...
@@ -227,8 +230,8 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
BatchNormMKLDNNHandler
handler
(
batch_norm_fwd_pd
,
dev_ctx
,
mkldnn_engine
,
key
);
auto
src_memory
=
handler
.
AcquireSrcMemory
(
x
->
get_mkldnn_prim_desc
(),
to_void_cast
(
x_data
));
auto
src_memory
=
handler
.
AcquireSrcMemory
(
user_src_md
,
to_void_cast
(
x_data
));
// crate mkldnn memory for weights(scale/shift)
auto
scaleshift_memory
=
...
...
@@ -262,7 +265,8 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
variance_memory
,
false
);
}
y
->
set_mkldnn_prim_desc
(
dst_memory
->
get_primitive_desc
());
y
->
set_layout
(
DataLayout
::
kMKLDNN
);
y
->
set_format
(
platform
::
GetMKLDNNFormat
(
*
dst_memory
));
std
::
vector
<
mkldnn
::
primitive
>
pipeline
;
pipeline
.
push_back
(
*
batch_norm_p
);
...
...
@@ -332,6 +336,9 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
using
bn_bwd_types
=
bn_type_traits
<
mkldnn
::
batch_normalization_backward
>
;
mkldnn
::
memory
::
format
dst_format
=
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
diff_y
->
format
());
mkldnn
::
memory
::
format
input_format
=
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
x
->
format
());
...
...
@@ -339,14 +346,14 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// keys from forward pass
const
std
::
string
key
=
BatchNormMKLDNNHandler
::
GetHash
(
src_tz
,
epsilon
,
flags
,
false
,
x
->
format
()
,
src_tz
,
epsilon
,
flags
,
false
,
input_format
,
ctx
.
op
().
Input
(
"SavedMean"
));
const
std
::
string
key_batch_norm_fwd_pd
=
key
+
"@bn_fwd_pd"
;
// keys for primitives reuse
const
std
::
string
key_with_hash
=
key
+
BatchNormMKLDNNHandler
::
GetHash
(
src_tz
,
epsilon
,
flags
,
false
,
x
->
format
()
);
input_format
);
const
std
::
string
key_batch_norm_bwd_p
=
key_with_hash
+
"@batch_norm_bwd_p"
;
const
std
::
string
key_batch_norm_src_mem_p
=
...
...
@@ -366,8 +373,9 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
primitive
reorder_diff_dst
;
bool
is_diff_dst_reordered
=
false
;
auto
user_diff_dst_memory
=
memory
(
diff_y
->
get_mkldnn_prim_desc
(),
to_void_cast
(
diff_y_data
));
auto
user_diff_dst_memory
=
memory
(
{{{
diff_dst_tz
},
memory
::
data_type
::
f32
,
dst_format
},
mkldnn_engine
},
to_void_cast
(
diff_y_data
));
// MKLDNN requires a single piece of memory for scale and shift/bias data
const
size_t
scaleshift_size
=
2
*
ic
;
...
...
@@ -451,7 +459,10 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
dev_ctx
.
SetBlob
(
key_batch_norm_diff_dst_mem_p
,
diff_dst_memory
);
// set layout/format of output tensors
diff_x
->
set_mkldnn_prim_desc
(
diff_src_memory
->
get_primitive_desc
());
diff_x
->
set_layout
(
DataLayout
::
kMKLDNN
);
diff_x
->
set_format
((
memory
::
format
)
diff_src_memory
->
get_primitive_desc
()
.
desc
()
.
data
.
format
);
}
else
{
// primitives already exist
UpdateMemoryData
(
dev_ctx
,
key_batch_norm_src_mem_p
,
to_void_cast
(
x_data
));
...
...
@@ -476,7 +487,10 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
}
// set layout/format of output tensors
diff_x
->
set_mkldnn_prim_desc
(
diff_src_memory
->
get_primitive_desc
());
diff_x
->
set_layout
(
DataLayout
::
kMKLDNN
);
diff_x
->
set_format
((
memory
::
format
)
diff_src_memory
->
get_primitive_desc
()
.
desc
()
.
data
.
format
);
}
// execute optional reorder and batch_norm backward primitive
...
...
paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
浏览文件 @
32146857
...
...
@@ -210,7 +210,8 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
stream
(
stream
::
kind
::
eager
).
submit
({
*
concat_p
}).
wait
();
output
->
set_mkldnn_prim_desc
(
concat_pd
->
dst_primitive_desc
());
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_format
(
GetDstMemFormat
(
*
concat_pd
));
}
};
}
// namespace operators
...
...
paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
浏览文件 @
32146857
...
...
@@ -96,8 +96,12 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
*
bias
=
ctx
.
HasInput
(
"Bias"
)
?
ctx
.
Input
<
Tensor
>
(
"Bias"
)
:
nullptr
;
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
PADDLE_ENFORCE
(
input
->
layout
()
==
DataLayout
::
kMKLDNN
);
PADDLE_ENFORCE
(
filter
->
layout
()
==
DataLayout
::
kMKLDNN
);
PADDLE_ENFORCE
(
input
->
layout
()
==
DataLayout
::
kMKLDNN
&&
input
->
format
()
!=
memory
::
format
::
format_undef
,
"Wrong layout/format set for Input tensor"
);
PADDLE_ENFORCE
(
filter
->
layout
()
==
DataLayout
::
kMKLDNN
&&
filter
->
format
()
!=
memory
::
format
::
format_undef
,
"Wrong layout/format set for Filter tensor"
);
PADDLE_ENFORCE
(
input
->
dims
().
size
()
==
4
||
input
->
dims
().
size
()
==
5
,
"Input must be with 4 or 5 dimensions, i.e. NCHW or NCDHW"
);
PADDLE_ENFORCE
(
filter
->
dims
().
size
()
==
4
||
filter
->
dims
().
size
()
==
5
,
...
...
@@ -144,19 +148,14 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std
::
vector
<
primitive
>
pipeline
;
// For convolution with groups we need to recreate primitive descriptor
// as Paddle tensor is not having group dims while mkldnn treats
// group as another dimensions
mkldnn
::
memory
::
primitive_desc
user_weights_mpd
=
filter
->
get_mkldnn_prim_desc
();
if
(
g
>
1
)
{
auto
src_format
=
input
->
format
();
mkldnn
::
memory
::
format
weights_format
=
GetWeightsFormat
(
filter
->
format
(),
g
,
is_conv3d
);
auto
user_src_md
=
platform
::
MKLDNNMemDesc
(
{
src_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
src_format
);
auto
user_weights_md
=
platform
::
MKLDNNMemDesc
(
{
weights_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
weights_format
);
user_weights_mpd
=
mkldnn
::
memory
::
primitive_desc
(
user_weights_md
,
mkldnn_engine
);
}
/* create memory descriptor for convolution without specified format
* ('any') which lets a primitive (convolution in this case) choose
...
...
@@ -166,7 +165,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
chosen_memory_format
=
platform
::
data_format_to_memory_format
(
data_format
);
mkldnn
::
memory
::
format
weights_format
=
mkldnn
::
memory
::
format
::
any
;
weights_format
=
mkldnn
::
memory
::
format
::
any
;
// Check the format for user's special output
if
(
chosen_memory_format
!=
mkldnn
::
memory
::
format
::
any
)
{
if
(
is_conv3d
)
{
...
...
@@ -206,10 +205,10 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
platform
::
ConvMKLDNNHandler
handler
(
conv_pd
,
dev_ctx
,
mkldnn_engine
,
key
);
// create mkldnn memory from input tensors (data/weights)
auto
user_src_memory_p
=
handler
.
AcquireSrcMemory
(
input
->
get_mkldnn_prim_desc
()
,
to_void_cast
<
T
>
(
input_data
));
auto
user_src_memory_p
=
handler
.
AcquireSrcMemory
(
user_src_md
,
to_void_cast
<
T
>
(
input_data
));
auto
user_weights_memory_p
=
handler
.
AcquireWeightsMemory
(
user_weights_m
p
d
,
to_void_cast
<
T
>
(
filter_data
));
user_weights_md
,
to_void_cast
<
T
>
(
filter_data
));
// create reorder primitive if the input format is not the preferred one
auto
src_memory_p
=
...
...
@@ -282,7 +281,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
pipeline
.
push_back
(
*
conv_p
);
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
output
->
set_mkldnn_prim_desc
(
dst_memory_p
->
get_primitive_desc
());
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_format
(
GetMKLDNNFormat
(
*
dst_memory_p
));
}
void
ComputeINT8
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
{
const
bool
is_test
=
ctx
.
Attr
<
bool
>
(
"is_test"
);
...
...
@@ -948,8 +948,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// push primitive to stream and wait until it's executed
pipeline
.
push_back
(
*
conv_bwd_weights_p
);
auto
filter_grad_mpd
=
diff_weights_memory_p
->
get_primitive_desc
(
);
filter_grad
->
set_
mkldnn_prim_desc
(
filter_grad_mpd
);
filter_grad
->
set_layout
(
DataLayout
::
kMKLDNN
);
filter_grad
->
set_
format
(
GetMKLDNNFormat
(
*
diff_weights_memory_p
)
);
}
if
(
input_grad
)
{
...
...
@@ -972,7 +972,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
pipeline
.
push_back
(
*
conv_bwd_data_p
);
input_grad
->
set_mkldnn_prim_desc
(
diff_src_memory_p
->
get_primitive_desc
());
input_grad
->
set_layout
(
DataLayout
::
kMKLDNN
);
input_grad
->
set_format
(
GetMKLDNNFormat
(
*
diff_src_memory_p
));
}
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
}
...
...
paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc
浏览文件 @
32146857
...
...
@@ -221,7 +221,8 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
pipeline
.
push_back
(
*
conv_p
);
mkldnn
::
stream
(
mkldnn
::
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
output
->
set_mkldnn_prim_desc
(
dst_memory_p
->
get_primitive_desc
());
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_format
(
platform
::
GetMKLDNNFormat
(
*
dst_memory_p
));
}
private:
...
...
paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc
浏览文件 @
32146857
...
...
@@ -42,12 +42,8 @@ class GaussianMKLDNNKernel : public paddle::framework::OpKernel<T> {
// The format of output is set as the mkldnn's format
// TODO(@mozga-intel) The format of matrix sets inside the another layers.
// TODO(jczaja): Remove this hack after checking performance on block layout
auto
tensor_mem_pd
=
paddle
::
platform
::
create_prim_desc_from_dims
(
paddle
::
framework
::
vectorize2int
(
tensor
->
dims
()),
mkldnn
::
memory
::
format
::
oihw
);
tensor
->
set_mkldnn_prim_desc
(
tensor_mem_pd
);
tensor
->
set_layout
(
DataLayout
::
kMKLDNN
);
tensor
->
set_format
(
mkldnn
::
memory
::
format
::
oihw
);
}
};
}
// namespace operators
...
...
paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
浏览文件 @
32146857
...
...
@@ -81,7 +81,10 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
e_mid
=
framework
::
EigenTensor
<
T
,
4
>::
From
(
*
mid
);
e_mid
=
e_mid
.
constant
(
k
);
auto
src_md
=
x
->
get_mkldnn_prim_desc
().
desc
();
auto
dims
=
paddle
::
framework
::
vectorize2int
(
x
->
dims
());
auto
src_md
=
paddle
::
platform
::
MKLDNNMemDesc
(
dims
,
mkldnn
::
memory
::
data_type
::
f32
,
x
->
format
());
auto
forward_desc
=
mkldnn
::
lrn_forward
::
desc
{
mkldnn
::
prop_kind
::
forward
,
mkldnn
::
lrn_across_channels
,
...
...
@@ -91,7 +94,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
beta
,
k
};
auto
src_memory_pd
=
x
->
get_mkldnn_prim_desc
()
;
auto
src_memory_pd
=
mkldnn
::
memory
::
primitive_desc
{
src_md
,
mkldnn_engine
}
;
if
(
!
is_test
)
{
const
std
::
string
key
=
ctx
.
op
().
Output
(
"Out"
);
...
...
@@ -108,15 +111,16 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
src_memory
->
set_data_handle
(
static_cast
<
void
*>
(
const_cast
<
T
*>
(
input_data
)));
auto
dst_memory_pd
=
forward_pd
->
dst_primitive_desc
();
auto
dst_memory
=
mkldnn
::
memory
(
dst_memory_pd
,
static_cast
<
void
*>
(
output_data
));
auto
dst_memory
=
mkldnn
::
memory
(
forward_pd
->
dst_primitive_desc
(),
static_cast
<
void
*>
(
output_data
));
auto
workspace_memory
=
insert_to_context
<
mkldnn
::
memory
>
(
key_workspace_memory
,
dev_ctx
,
forward_pd
->
workspace_primitive_desc
());
run_primitive
(
*
forward_pd
,
*
src_memory
,
*
workspace_memory
,
dst_memory
);
out
->
set_mkldnn_prim_desc
(
dst_memory_pd
);
out
->
set_layout
(
framework
::
DataLayout
::
kMKLDNN
);
out
->
set_format
(
platform
::
GetMKLDNNFormat
(
dst_memory
));
}
else
{
auto
forward_pd
=
mkldnn
::
lrn_forward
::
primitive_desc
{
forward_desc
,
mkldnn_engine
};
...
...
@@ -124,12 +128,13 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
src_memory_pd
,
static_cast
<
void
*>
(
const_cast
<
T
*>
(
input_data
))};
auto
workspace_memory
=
mkldnn
::
memory
{
forward_pd
.
workspace_primitive_desc
()};
auto
dst_memory_pd
=
forward_pd
.
dst_primitive_desc
();
auto
dst_memory
=
mkldnn
::
memory
(
forward_pd
.
dst_primitive_desc
(),
static_cast
<
void
*>
(
output_data
));
run_primitive
(
forward_pd
,
src_memory
,
workspace_memory
,
dst_memory
);
out
->
set_mkldnn_prim_desc
(
dst_memory_pd
);
out
->
set_layout
(
framework
::
DataLayout
::
kMKLDNN
);
out
->
set_format
(
platform
::
GetMKLDNNFormat
(
dst_memory
));
}
}
};
...
...
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
浏览文件 @
32146857
...
...
@@ -158,14 +158,6 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
auto
softmax_p
=
handler
.
AcquireSoftmax
(
softmax_dst_memory_p
,
softmax_src_memory_p
);
// We cannot use softmax_dst_memory_p to get prim desc as
// it contains flattened dims (2D) while output tensor can
// have 2,3,4+ dims
auto
output_mem_pd
=
paddle
::
platform
::
create_prim_desc_from_dims
(
paddle
::
framework
::
vectorize2int
(
output
->
dims
()),
mkldnn
::
memory
::
format
::
blocked
);
output
->
set_mkldnn_prim_desc
(
output_mem_pd
);
std
::
vector
<
primitive
>
pipeline
{
*
(
static_cast
<
softmax_forward
::
primitive
*>
(
softmax_p
.
get
()))};
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
...
...
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
浏览文件 @
32146857
...
...
@@ -106,12 +106,12 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
memory
::
desc
(
dst_tz
,
memory
::
data_type
::
f32
,
memory
::
format
::
any
);
auto
sum_pd
=
sum
::
primitive_desc
(
dst_md
,
scales
,
srcs_mpd
);
auto
dst_mem_pd
=
sum_pd
.
dst_primitive_desc
();
std
::
shared_ptr
<
memory
>
dst_mem
;
if
(
in_place
)
{
dst_mem
.
reset
(
new
memory
(
dst_mem_pd
));
dst_mem
.
reset
(
new
memory
(
sum_pd
.
dst_primitive_desc
()
));
}
else
{
dst_mem
.
reset
(
new
memory
(
dst_mem_pd
,
output_data
));
dst_mem
.
reset
(
new
memory
(
sum_pd
.
dst_primitive_desc
()
,
output_data
));
}
std
::
vector
<
mkldnn
::
primitive
::
at
>
inputs
;
for
(
size_t
i
=
0
;
i
<
srcs_mem
.
size
();
++
i
)
{
...
...
@@ -136,7 +136,8 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
if
(
in_place
)
pipeline
.
push_back
(
reorder_prim
);
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
output
->
set_mkldnn_prim_desc
(
dst_mem_pd
);
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_format
(
output_format
);
}
else
{
// Fallback to naive version
// TODO(@mozga-intel) Add MKLDNN SelectedRows & LoDTensorArray support
SumKernel
<
CPUDeviceContext
,
T
>
reference_kernel
;
...
...
paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
浏览文件 @
32146857
...
...
@@ -52,7 +52,7 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
mkldnn_engine
,
key
);
auto
transpose_src_memory_p
=
handler
.
AcquireSrcMemory
(
input
->
get_mkldnn_prim_desc
(),
platform
::
to_void_cast
<
T
>
(
input_data
));
input
->
format
(),
platform
::
to_void_cast
<
T
>
(
input_data
));
auto
transpose_dst_memory_p
=
handler
.
AcquireDstMemory
(
output
,
ctx
.
GetPlace
());
auto
transpose_p
=
handler
.
AcquireTranspose
(
transpose_dst_memory_p
,
...
...
@@ -62,14 +62,8 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
pipeline
.
push_back
(
*
transpose_p
);
mkldnn
::
stream
(
mkldnn
::
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
// Transpose did change logical dimensions of Tensor, but reorder does not.
// Reorder does change only physical layout eg. format , strides
// so we need to create new primitive descriptor with changed logical layout
// so it match output shape
auto
output_mem_pd
=
paddle
::
platform
::
create_prim_desc_from_dims
(
paddle
::
framework
::
vectorize2int
(
output
->
dims
()),
mkldnn
::
memory
::
format
::
blocked
);
output
->
set_mkldnn_prim_desc
(
output_mem_pd
);
output
->
set_layout
(
DataLayout
::
kNCHW
);
output
->
set_format
(
mkldnn
::
memory
::
format
::
format_undef
);
}
};
...
...
@@ -134,9 +128,8 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
platform
::
TransposeMKLDNNHandler
handler
(
nchw_tz
,
reversed_axis
,
dev_ctx
,
mkldnn_engine
,
key
);
auto
transpose_src_memory_p
=
handler
.
AcquireSrcMemory
(
out_grad
->
get_mkldnn_prim_desc
(),
platform
::
to_void_cast
<
T
>
(
out_grad_data
));
auto
transpose_src_memory_p
=
handler
.
AcquireSrcMemory
(
out_grad
->
format
(),
platform
::
to_void_cast
<
T
>
(
out_grad_data
));
auto
transpose_dst_memory_p
=
handler
.
AcquireDstMemory
(
x_grad
,
ctx
.
GetPlace
());
auto
transpose_p
=
handler
.
AcquireTranspose
(
transpose_dst_memory_p
,
...
...
@@ -145,15 +138,6 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
std
::
vector
<
mkldnn
::
primitive
>
pipeline
;
pipeline
.
push_back
(
*
transpose_p
);
mkldnn
::
stream
(
mkldnn
::
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
// Transpose did change logical dimensions of Tensor, but reorder does not.
// Reorder does change only physical layout eg. format , strides
// so we need to create new primitive descriptor with changed logical layout
// so it match output shape
auto
x_grad_mem_pd
=
paddle
::
platform
::
create_prim_desc_from_dims
(
paddle
::
framework
::
vectorize2int
(
x_grad
->
dims
()),
mkldnn
::
memory
::
format
::
blocked
);
x_grad
->
set_mkldnn_prim_desc
(
x_grad_mem_pd
);
}
};
...
...
paddle/fluid/operators/multiplex_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/multiplex_op.h"
#include <memory>
#include <vector>
namespace
paddle
{
namespace
operators
{
...
...
@@ -111,28 +113,47 @@ class MultiplexGradOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
!
ctx
->
Inputs
(
"X"
).
empty
(),
"Input(X) should not be null."
);
PADDLE_ENFORCE
(
!
ctx
->
Outputs
(
framework
::
GradVarName
(
"X"
)).
empty
(),
"Output(X@Grad) should not be null."
);
auto
&
dxs
=
ctx
->
Outputs
(
framework
::
GradVarName
(
"X"
));
PADDLE_ENFORCE
(
!
dxs
.
empty
(),
"Output(X@Grad) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null."
);
ctx
->
SetOutputsDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputsDim
(
"X"
));
auto
dout_dim
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
ctx
->
SetOutputsDim
(
framework
::
GradVarName
(
"X"
),
std
::
vector
<
framework
::
DDim
>
(
dxs
.
size
(),
dout_dim
));
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
MultiInput
<
Tensor
>
(
"X"
)[
0
]
->
type
(),
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
(),
ctx
.
device_context
());
}
};
class
MultiplexGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"multiplex_grad"
);
op
->
SetInput
(
"Ids"
,
Input
(
"Ids"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
,
false
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
multiplex
,
ops
::
MultiplexOp
,
ops
::
MultiplexOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
false
>
);
ops
::
MultiplexGradDescMaker
);
REGISTER_OPERATOR
(
multiplex_grad
,
ops
::
MultiplexGradOp
);
REGISTER_OP_CPU_KERNEL
(
multiplex
,
...
...
paddle/fluid/operators/multiplex_op.cu
浏览文件 @
32146857
...
...
@@ -53,20 +53,25 @@ class MultiplexGradGPUKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
auto
*
d_out
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
ins
=
ctx
.
MultiInput
<
Tensor
>
(
"X"
);
auto
*
ids
=
ctx
.
Input
<
Tensor
>
(
"Ids"
);
auto
d_ins
=
ctx
.
MultiOutput
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
size_t
idx
=
-
1UL
;
for
(
size_t
i
=
0
;
i
<
d_ins
.
size
();
i
++
)
{
if
(
d_ins
[
i
])
{
d_ins
[
i
]
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
t
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
d_ins
[
i
]);
t
.
device
(
*
ctx
.
template
device_context
<
Place
>().
eigen_device
())
=
t
.
constant
(
static_cast
<
T
>
(
0
));
idx
=
i
;
}
}
auto
rows
=
ins
[
0
]
->
dims
()[
0
];
auto
cols
=
ins
[
0
]
->
numel
()
/
rows
;
if
(
idx
==
-
1UL
)
return
;
auto
rows
=
d_ins
[
idx
]
->
dims
()[
0
];
auto
cols
=
d_ins
[
idx
]
->
numel
()
/
rows
;
// copy index to cpu
Tensor
index_t_cpu
;
TensorCopySync
(
*
ids
,
platform
::
CPUPlace
(),
&
index_t_cpu
);
...
...
paddle/fluid/operators/multiplex_op.h
浏览文件 @
32146857
...
...
@@ -52,20 +52,25 @@ class MultiplexGradCPUKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
auto
*
d_out
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
ids
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Ids"
);
auto
ins
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"X"
);
auto
d_ins
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
size_t
idx
=
-
1UL
;
for
(
size_t
i
=
0
;
i
<
d_ins
.
size
();
i
++
)
{
if
(
d_ins
[
i
])
{
d_ins
[
i
]
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
t
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
d_ins
[
i
]);
t
.
device
(
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
())
=
t
.
constant
(
static_cast
<
T
>
(
0
));
idx
=
i
;
}
}
auto
rows
=
ins
[
0
]
->
dims
()[
0
];
auto
cols
=
ins
[
0
]
->
numel
()
/
rows
;
if
(
idx
==
-
1UL
)
return
;
auto
rows
=
d_ins
[
idx
]
->
dims
()[
0
];
auto
cols
=
d_ins
[
idx
]
->
numel
()
/
rows
;
auto
*
index
=
ids
->
data
<
int32_t
>
();
platform
::
CPUPlace
place
=
boost
::
get
<
platform
::
CPUPlace
>
(
ctx
.
GetPlace
());
for
(
auto
i
=
0
;
i
<
rows
;
i
++
)
{
...
...
paddle/fluid/operators/pad_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/pad_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -29,7 +30,7 @@ class PadOp : public framework::OperatorWithKernel {
"Output(Out) of PadOp should not be null."
);
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
auto
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
auto
&
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
PADDLE_ENFORCE_EQ
(
x_dim
.
size
()
*
2
,
int64_t
(
paddings
.
size
()),
"Size of paddings should be equal to 2 * dimension size "
"of input tensor."
);
...
...
@@ -99,13 +100,20 @@ class PadOpGrad : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null"
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
dout_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
auto
&
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
for
(
int
i
=
0
;
i
<
dout_dims
.
size
();
++
i
)
{
dout_dims
[
i
]
-=
(
paddings
[
i
*
2
]
+
paddings
[
i
*
2
+
1
]);
}
auto
x_grad_name
=
framework
::
GradVarName
(
"X"
);
if
(
ctx
->
HasOutput
(
x_grad_name
))
{
ctx
->
SetOutputDim
(
x_grad_name
,
x_dims
);
auto
dout_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
auto
&
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
for
(
int
i
=
0
;
i
<
dout_dims
.
size
();
++
i
)
{
dout_dims
[
i
]
-=
(
paddings
[
i
*
2
]
+
paddings
[
i
*
2
+
1
]);
}
ctx
->
SetOutputDim
(
x_grad_name
,
dout_dims
);
}
}
};
...
...
@@ -117,7 +125,6 @@ class PadOpGradMaker : public framework::SingleGradOpDescMaker {
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
bind
=
new
framework
::
OpDesc
();
bind
->
SetInput
(
"X"
,
Input
(
"X"
));
bind
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
bind
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
bind
->
SetAttrMap
(
Attrs
());
...
...
paddle/fluid/operators/psroi_pool_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/psroi_pool_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -154,12 +155,29 @@ class PSROIPoolGradOp : public framework::OperatorWithKernel {
}
};
class
PSROIPoolGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"psroi_pool_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
"ROIs"
,
Input
(
"ROIs"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
psroi_pool
,
ops
::
PSROIPoolOp
,
ops
::
PSROIPoolOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
PSROIPoolGradDescMaker
);
REGISTER_OPERATOR
(
psroi_pool_grad
,
ops
::
PSROIPoolGradOp
);
REGISTER_OP_CPU_KERNEL
(
psroi_pool
,
...
...
paddle/fluid/operators/rank_loss_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/rank_loss_op.h"
#include <memory>
#include <string>
namespace
paddle
{
...
...
@@ -116,6 +117,25 @@ class RankLossGradOp : public framework::OperatorWithKernel {
}
};
class
RankLossGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"rank_loss_grad"
);
op
->
SetInput
(
"Label"
,
Input
(
"Label"
));
op
->
SetInput
(
"Left"
,
Input
(
"Left"
));
op
->
SetInput
(
"Right"
,
Input
(
"Right"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Left"
),
InputGrad
(
"Left"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Right"
),
InputGrad
(
"Right"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/roi_align_op.cc
浏览文件 @
32146857
...
...
@@ -10,6 +10,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/roi_align_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -147,12 +148,29 @@ Thus avoid the misaligned problem.
}
};
class
ROIAlignGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"roi_align_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
"ROIs"
,
Input
(
"ROIs"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
roi_align
,
ops
::
ROIAlignOp
,
ops
::
ROIAlignOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
ROIAlignGradDescMaker
);
REGISTER_OPERATOR
(
roi_align_grad
,
ops
::
ROIAlignGradOp
);
REGISTER_OP_CPU_KERNEL
(
roi_align
,
...
...
paddle/fluid/operators/roi_pool_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/roi_pool_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -158,12 +159,30 @@ https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
}
};
class
ROIPoolGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"roi_pool_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
"ROIs"
,
Input
(
"ROIs"
));
op
->
SetInput
(
"Argmax"
,
Output
(
"Argmax"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
roi_pool
,
ops
::
ROIPoolOp
,
ops
::
ROIPoolOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
ROIPoolGradDescMaker
);
REGISTER_OPERATOR
(
roi_pool_grad
,
ops
::
ROIPoolGradOp
);
REGISTER_OP_CPU_KERNEL
(
roi_pool
,
...
...
paddle/fluid/operators/scatter_op.cc
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/scatter_op.h"
#include <memory>
#include "paddle/fluid/framework/ddim.h"
namespace
paddle
{
...
...
@@ -63,13 +64,15 @@ class ScatterGradOp : public framework::OperatorWithKernel {
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Updates"
),
ctx
->
GetInputDim
(
"Updates"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
)));
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
(),
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
(),
ctx
.
device_context
());
}
};
...
...
@@ -95,12 +98,34 @@ $$
}
};
class
ScatterGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"scatter_grad"
);
op
->
SetInput
(
"Ids"
,
Input
(
"Ids"
));
op
->
SetInput
(
"Updates"
,
Input
(
"Updates"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Updates"
),
InputGrad
(
"Updates"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
ScatterGradNoNeedBufferVarsInference
,
"Updates"
);
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
scatter
,
ops
::
ScatterOp
,
ops
::
ScatterOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
scatter_grad
,
ops
::
ScatterGradOp
);
ops
::
ScatterGradDescMaker
);
REGISTER_OPERATOR
(
scatter_grad
,
ops
::
ScatterGradOp
,
ops
::
ScatterGradNoNeedBufferVarsInference
);
REGISTER_OP_CPU_KERNEL
(
scatter
,
ops
::
ScatterOpKernel
<
float
>
);
REGISTER_OP_CPU_KERNEL
(
scatter_grad
,
ops
::
ScatterGradientOpKernel
<
float
>
);
paddle/fluid/operators/shuffle_channel_op.cc
浏览文件 @
32146857
...
...
@@ -10,6 +10,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/shuffle_channel_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
...
...
@@ -91,13 +92,28 @@ class ShuffleChannelGradOp : public framework::OperatorWithKernel {
}
};
class
ShuffleChannelGradDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"shuffle_channel_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
shuffle_channel
,
ops
::
ShuffleChannelOp
,
ops
::
ShuffleChannelOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
ShuffleChannelOpMaker
,
ops
::
ShuffleChannelGradDescMaker
);
REGISTER_OPERATOR
(
shuffle_channel_grad
,
ops
::
ShuffleChannelGradOp
);
...
...
paddle/fluid/platform/mkldnn_reuse.h
浏览文件 @
32146857
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/data_layout_transform.h"
...
...
@@ -39,45 +40,6 @@ class MKLDNNHandler {
return
this
->
AcquireMemory
(
md
,
ptr
,
"@user_src_mem_p"
);
}
// TODO(jczaja): extract common part and make AcquireMemory
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireSrcMemory
(
const
mkldnn
::
memory
::
primitive_desc
&
mpd
,
void
*
ptr
)
{
auto
local_key
=
key_
+
"@user_src_mem_p"
;
auto
mem_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
dev_ctx_
.
GetBlob
(
local_key
));
PADDLE_ENFORCE
((
mem_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
" find mem primitive in device context"
);
if
(
mem_p
==
nullptr
)
{
mem_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
mpd
,
ptr
);
dev_ctx_
.
SetBlob
(
local_key
,
mem_p
);
}
else
{
mem_p
->
set_data_handle
(
ptr
);
// Mark that reusing happenned. All primitives from operator instance
// should be reused or none of them. So we check consistency
is_reusing_
=
true
;
}
return
mem_p
;
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireWeightsMemory
(
const
mkldnn
::
memory
::
primitive_desc
&
mpd
,
void
*
ptr
)
{
auto
local_key
=
key_
+
"@user_weights_mem_p"
;
auto
mem_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
dev_ctx_
.
GetBlob
(
local_key
));
PADDLE_ENFORCE
((
mem_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
" find mem primitive in device context"
);
if
(
mem_p
==
nullptr
)
{
mem_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
mpd
,
ptr
);
dev_ctx_
.
SetBlob
(
local_key
,
mem_p
);
}
else
{
mem_p
->
set_data_handle
(
ptr
);
// Mark that reusing happenned. All primitives from operator instance
// should be reused or none of them. So we check consistency
is_reusing_
=
true
;
}
return
mem_p
;
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireWeightsMemory
(
const
mkldnn
::
memory
::
desc
&
md
,
void
*
ptr
,
user_function
custom_func
=
{})
{
...
...
@@ -315,7 +277,37 @@ class TransposeMKLDNNHandler : public MKLDNNHandler {
mkldnn
::
engine
engine
,
const
std
::
string
&
base_key
)
:
platform
::
MKLDNNHandler
(
dev_ctx
,
engine
,
base_key
),
dims_
(
dims
),
axis_
(
axis
)
{}
axis_
(
axis
),
logical_axis_
(
dims
.
size
(),
0
)
{}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireSrcMemory
(
const
mkldnn
::
memory
::
format
&
fmt
,
void
*
ptr
)
{
auto
local_key
=
key_
+
"@user_src_mem_p"
;
auto
mem_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
dev_ctx_
.
GetBlob
(
local_key
));
PADDLE_ENFORCE
((
mem_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
" find mem primitive in device context"
);
if
(
mem_p
==
nullptr
)
{
// Make memory descriptor using input format, unless it
// cannot be trusted (nchw) then make up memory fmt manually
for
(
size_t
i
=
0
;
i
<
logical_axis_
.
size
();
++
i
)
{
logical_axis_
[
i
]
=
i
;
}
auto
src_md
=
fmt
!=
mkldnn
::
memory
::
format
::
nchw
?
platform
::
MKLDNNMemDesc
(
dims_
,
platform
::
MKLDNNGetDataType
<
float
>
(),
fmt
)
:
Axis2MemoryDesc
(
dims_
,
logical_axis_
);
mem_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
mkldnn
::
memory
::
primitive_desc
{
src_md
,
engine_
},
ptr
);
dev_ctx_
.
SetBlob
(
local_key
,
mem_p
);
}
else
{
mem_p
->
set_data_handle
(
ptr
);
// Mark that reusing happenned. All primitives from operator instance
// should be reused or none of them. So we check consistency
is_reusing_
=
true
;
}
return
mem_p
;
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDstMemory
(
framework
::
Tensor
*
output
,
platform
::
Place
place
)
{
...
...
@@ -400,6 +392,7 @@ class TransposeMKLDNNHandler : public MKLDNNHandler {
private:
std
::
vector
<
int
>
dims_
;
std
::
vector
<
int
>
axis_
;
std
::
vector
<
int
>
logical_axis_
;
};
template
<
class
forward_t
,
class
backward_data_t
,
class
backward_weights_t
>
...
...
paddle/fluid/platform/mkldnn_utils.h
已删除
100644 → 0
浏览文件 @
d980ba19
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <mkldnn.h>
#include <string>
namespace
paddle
{
namespace
platform
{
inline
mkldnn
::
memory
::
primitive_desc
create_prim_desc_from_dims
(
const
std
::
vector
<
int
>&
ltz
,
mkldnn
::
memory
::
format
fmt
,
mkldnn
::
memory
::
data_type
data_type
=
mkldnn
::
memory
::
data_type
::
f32
)
{
mkldnn_memory_desc_t
mem_fmt
;
mem_fmt
.
primitive_kind
=
mkldnn_memory
;
mem_fmt
.
ndims
=
ltz
.
size
();
for
(
unsigned
int
i
=
0
;
i
<
ltz
.
size
();
++
i
)
{
mem_fmt
.
dims
[
i
]
=
ltz
[
i
];
// logical dimensions (nchw format,
// regardless physical layout)
}
mem_fmt
.
data_type
=
static_cast
<
mkldnn_data_type_t
>
(
data_type
);
mem_fmt
.
format
=
static_cast
<
mkldnn_memory_format_t
>
(
fmt
);
unsigned
int
total_stride
=
1
;
for
(
int
i
=
ltz
.
size
()
-
1
;
i
>=
0
;
--
i
)
{
mem_fmt
.
layout_desc
.
blocking
.
padding_dims
[
i
]
=
ltz
[
i
];
// logical dimensions (nchw format, regardless physical
// layout)
mem_fmt
.
layout_desc
.
blocking
.
block_dims
[
i
]
=
1
;
mem_fmt
.
layout_desc
.
blocking
.
offset_padding_to_data
[
i
]
=
0
;
// no offset
mem_fmt
.
layout_desc
.
blocking
.
strides
[
0
][
i
]
=
total_stride
;
mem_fmt
.
layout_desc
.
blocking
.
strides
[
1
][
i
]
=
1
;
total_stride
*=
ltz
[
i
];
}
mem_fmt
.
layout_desc
.
blocking
.
offset_padding
=
0
;
// no initial offset
auto
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
place
=
paddle
::
platform
::
CPUPlace
();
auto
*
dev_ctx
=
dynamic_cast
<
platform
::
MKLDNNDeviceContext
*>
(
pool
.
Get
(
place
));
auto
&
cpu_engine
=
dev_ctx
->
GetEngine
();
return
mkldnn
::
memory
::
primitive_desc
(
mem_fmt
,
cpu_engine
);
}
inline
mkldnn
::
memory
::
primitive_desc
create_prim_desc_from_format
(
const
std
::
vector
<
int
>&
ltz
,
const
mkldnn
::
memory
::
format
format
,
const
mkldnn
::
memory
::
data_type
data_type
)
{
auto
md
=
mkldnn
::
memory
::
desc
({
ltz
},
data_type
,
format
);
auto
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
place
=
paddle
::
platform
::
CPUPlace
();
auto
dev_ctx
=
dynamic_cast
<
platform
::
MKLDNNDeviceContext
*>
(
pool
.
Get
(
place
));
PADDLE_ENFORCE_NOT_NULL
(
dev_ctx
,
"Could not get valid device"
);
auto
&
cpu_engine
=
dev_ctx
->
GetEngine
();
return
mkldnn
::
memory
::
primitive_desc
(
md
,
cpu_engine
);
}
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/temporary_allocator.cc
浏览文件 @
32146857
...
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/platform/temporary_allocator.h"
#include <memory>
#include <utility>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
DEFINE_int64
(
limit_of_tmp_allocation
,
-
1
,
...
...
@@ -30,31 +31,38 @@ namespace paddle {
namespace
platform
{
namespace
alloc
=
memory
::
allocation
;
TemporaryAllocation
::
TemporaryAllocation
(
alloc
::
AllocationPtr
&&
underlying_allocation
)
:
Allocation
(
underlying_allocation
->
ptr
(),
underlying_allocation
->
size
(),
underlying_allocation
->
place
()),
underlying_allocation_
(
std
::
move
(
underlying_allocation
))
{}
TemporaryAllocator
::
TemporaryAllocator
(
platform
::
Place
place
)
:
place_
(
place
)
{
temp_mem_map_
.
reset
(
new
std
::
multimap
<
size_t
,
alloc
::
Allocation
*>
());
temp_mem_map_
.
reset
(
new
std
::
multimap
<
size_t
,
Temporary
Allocation
*>
());
}
bool
TemporaryAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
void
TemporaryAllocator
::
Release
(
const
std
::
function
<
void
()
>
&
callback
)
{
std
::
unique_ptr
<
std
::
multimap
<
size_t
,
alloc
::
Allocation
*>>
t_allocations
;
std
::
unique_ptr
<
std
::
multimap
<
size_t
,
Temporary
Allocation
*>>
t_allocations
;
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mtx_
);
callback
();
t_allocations
.
swap
(
temp_mem_map_
);
temp_mem_map_
.
reset
(
new
std
::
multimap
<
size_t
,
alloc
::
Allocation
*>
());
temp_mem_map_
.
reset
(
new
std
::
multimap
<
size_t
,
Temporary
Allocation
*>
());
wait_delete_mem_
=
0
;
}
alloc
::
AllocationDeleter
deleter
;
for
(
auto
tmp
:
*
t_allocations
)
{
VLOG
(
10
)
<<
"Delete temporary allocation "
<<
tmp
.
second
->
ptr
()
<<
" size: "
<<
tmp
.
second
->
size
();
delete
r
(
tmp
.
second
)
;
delete
tmp
.
second
;
}
}
void
TemporaryAllocator
::
FreeImpl
(
alloc
::
Allocation
*
temp_allocation
)
{
void
TemporaryAllocator
::
Free
(
alloc
::
Allocation
*
allocation
)
{
auto
*
temp_allocation
=
dynamic_cast
<
TemporaryAllocation
*>
(
allocation
);
PADDLE_ENFORCE_NOT_NULL
(
temp_allocation
);
if
(
platform
::
is_gpu_place
(
temp_allocation
->
place
()))
{
PADDLE_ENFORCE
(
platform
::
is_same_place
(
temp_allocation
->
place
(),
place_
),
"The place should be the same."
);
...
...
@@ -78,7 +86,7 @@ void TemporaryAllocator::FreeImpl(alloc::Allocation *temp_allocation) {
}
VLOG
(
10
)
<<
"Delete temporary allocation "
<<
temp_allocation
->
ptr
()
<<
" size: "
<<
temp_allocation
->
size
();
alloc
::
AllocationDeleter
()(
temp_allocation
)
;
delete
temp_allocation
;
}
size_t
TemporaryAllocator
::
TemporaryAllocationQueueSize
()
{
...
...
@@ -113,9 +121,11 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl(
}
// If not find the the available allocation, get allocation from
// AllocatorFacadeInstance.
auto
temp_mem
=
alloc
::
AllocatorFacade
::
Instance
().
Alloc
(
place_
,
size
,
attr
);
auto
raw_allocation
=
alloc
::
AllocatorFacade
::
Instance
().
Alloc
(
place_
,
size
,
attr
);
auto
temp_mem
=
new
TemporaryAllocation
(
std
::
move
(
raw_allocation
));
VLOG
(
10
)
<<
"Alloc temporary allocation: "
<<
temp_mem
->
ptr
()
<<
": "
<<
size
;
return
temp_mem
.
release
()
;
return
temp_mem
;
}
}
// namespace platform
...
...
paddle/fluid/platform/temporary_allocator.h
浏览文件 @
32146857
...
...
@@ -23,6 +23,14 @@
namespace
paddle
{
namespace
platform
{
class
TemporaryAllocation
:
public
memory
::
allocation
::
Allocation
{
public:
explicit
TemporaryAllocation
(
memory
::
allocation
::
AllocationPtr
&&
underlying_allocation
);
memory
::
allocation
::
AllocationPtr
underlying_allocation_
;
};
/*! \brief the TemporaryAllocator is used to alloc the temporary allocation
* which used by CUDA's async operation.
*
...
...
@@ -49,7 +57,7 @@ class TemporaryAllocator : public memory::allocation::Allocator {
void
SetCallback
(
const
std
::
function
<
void
()
>
&
callback
);
protected:
void
Free
Impl
(
memory
::
allocation
::
Allocation
*
allocation
)
override
;
void
Free
(
memory
::
allocation
::
Allocation
*
allocation
)
override
;
memory
::
allocation
::
Allocation
*
AllocateImpl
(
size_t
size
,
memory
::
allocation
::
Allocator
::
Attr
attr
)
override
;
...
...
@@ -58,8 +66,8 @@ class TemporaryAllocator : public memory::allocation::Allocator {
platform
::
Place
place_
;
// When the allocation is not held by any variable, it should be placed
// to temp_mem_map immediately.
std
::
unique_ptr
<
std
::
multimap
<
size_t
,
memory
::
allocation
::
Allocation
*>>
temp_mem_map_
{
nullptr
};
std
::
unique_ptr
<
std
::
multimap
<
size_t
,
TemporaryAllocation
*>>
temp_mem_map_
{
nullptr
};
std
::
mutex
mtx_
;
size_t
wait_delete_mem_
{
0
};
std
::
function
<
void
()
>
callback_
;
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
32146857
...
...
@@ -324,7 +324,6 @@ PYBIND11_MODULE(core, m) {
[](
Tensor
&
self
,
paddle
::
platform
::
CUDAPinnedPlace
&
place
)
{
self
.
mutable_data
<
float
>
(
place
);
})
.
def
(
"_clear"
,
&
Tensor
::
clear
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
double
>
)
...
...
@@ -1283,6 +1282,15 @@ All parameter, weight, gradient are variables in Paddle.
it will save GPU memory and may make the execution faster.
This options is only available in GPU devices.
Default False)DOC"
)
.
def_property
(
"fuse_all_optimizer_ops"
,
[](
const
BuildStrategy
&
self
)
{
return
self
.
fuse_all_optimizer_ops_
;
},
[](
BuildStrategy
&
self
,
bool
b
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
fuse_all_optimizer_ops_
=
b
;
})
.
def_property
(
"sync_batch_norm"
,
[](
const
BuildStrategy
&
self
)
{
return
self
.
sync_batch_norm_
;
},
...
...
paddle/fluid/string/printf.h
浏览文件 @
32146857
...
...
@@ -105,12 +105,14 @@ void Printf(const char* fmt, const Args&... args) {
Fprintf
(
std
::
cout
,
fmt
,
args
...);
}
inline
std
::
string
HumanReadableSize
(
double
f_size
)
{
template
<
typename
T
>
std
::
string
HumanReadableSize
(
T
size
)
{
size_t
i
=
0
;
double
f_size
=
static_cast
<
double
>
(
size
);
double
orig
=
f_size
;
const
std
::
vector
<
std
::
string
>
units
(
{
"B"
,
"kB"
,
"MB"
,
"GB"
,
"TB"
,
"PB"
,
"EB"
,
"ZB"
,
"YB"
});
while
(
f_size
>
=
1024
)
{
while
(
f_size
>
1024
)
{
f_size
/=
1024
;
i
++
;
}
...
...
python/paddle/fluid/__init__.py
浏览文件 @
32146857
...
...
@@ -34,7 +34,7 @@ from . import io
from
.
import
evaluator
from
.
import
initializer
from
.
import
layers
from
.
import
imperative
from
.
import
dygraph
from
.
import
contrib
from
.
import
nets
from
.
import
optimizer
...
...
@@ -71,7 +71,7 @@ __all__ = framework.__all__ + executor.__all__ + \
'initializer'
,
'layers'
,
'contrib'
,
'
imperative
'
,
'
dygraph
'
,
'transpiler'
,
'nets'
,
'optimizer'
,
...
...
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
浏览文件 @
32146857
...
...
@@ -26,6 +26,17 @@ __all__ = [
]
def
_init_var_node
(
var_node
,
value
,
scope
,
place
):
assert
isinstance
(
value
,
np
.
ndarray
),
'The type of value should be numpy array.'
assert
scope
is
not
None
,
\
'The scope cannot be set None.'
assert
place
is
not
None
,
\
'The place cannot be set None.'
tensor
=
scope
.
var
(
var_node
.
name
()).
get_tensor
()
tensor
.
set
(
value
,
place
)
class
QuantizationTransformPass
(
object
):
def
__init__
(
self
,
scope
=
None
,
...
...
@@ -88,14 +99,14 @@ class QuantizationTransformPass(object):
assert
activation_quantize_type
!=
'channel_wise_abs_max'
,
"The activation quantization type does not support 'channel_wise_abs_max'."
if
activation_quantize_type
not
in
quant_type
:
raise
ValueError
(
"Unknown activation_quantize_type : '%s'. It can only be "
,
"'abs_max' or 'range_abs_max' or 'moving_average_abs_max'."
,
str
(
activation_quantize_type
))
"Unknown activation_quantize_type : '%s'. It can only be "
"'abs_max' or 'range_abs_max' or 'moving_average_abs_max'."
%
(
str
(
activation_quantize_type
)
))
if
weight_quantize_type
not
in
quant_type
:
raise
ValueError
(
"Unknown weight_quantize_type: '%s'. It can only be "
,
"'abs_max' or 'channel_wise_abs_max' or 'range_abs_max' or 'moving_average_abs_max'."
,
str
(
weight_quantize_type
))
"Unknown weight_quantize_type: '%s'. It can only be "
"'abs_max' or 'channel_wise_abs_max' or 'range_abs_max' or 'moving_average_abs_max'."
%
(
str
(
weight_quantize_type
)
))
self
.
_activation_quantize_type
=
activation_quantize_type
self
.
_weight_quantize_type
=
weight_quantize_type
...
...
@@ -121,8 +132,6 @@ class QuantizationTransformPass(object):
"""
assert
isinstance
(
graph
,
IrGraph
),
'graph must be the instance of IrGraph.'
#sequential_execution = core.get_pass('sequential_execution_pass')
#sequential_execution.apply(graph.graph)
self
.
_is_test
=
graph
.
is_test
()
# marked the variable which has been dequantized.
dequantized_vars
=
collections
.
OrderedDict
()
...
...
@@ -203,9 +212,12 @@ class QuantizationTransformPass(object):
var_type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
shape
=
[
1
],
var_dtype
=
core
.
VarDesc
.
VarType
.
INT64
)
self
.
_init_var_node
(
global_step_in
,
np
.
zeros
(
[
1
],
dtype
=
'int64'
))
_init_var_node
(
global_step_in
,
np
.
zeros
(
[
1
],
dtype
=
'int64'
),
self
.
_scope
,
self
.
_place
)
global_step_out
=
graph
.
create_var_node_from_desc
(
global_step_in
.
var
())
# The attribute of `op_role` is needed by ParallelExecutor.
...
...
@@ -284,7 +296,12 @@ class QuantizationTransformPass(object):
var_dtype
=
var_node
.
dtype
())
data_type
=
'float64'
if
var_node
.
dtype
(
)
==
core
.
VarDesc
.
VarType
.
FP64
else
'float32'
self
.
_init_var_node
(
scale_in_node
,
np
.
array
([
0.001
],
dtype
=
data_type
))
_init_var_node
(
scale_in_node
,
np
.
array
(
[
0.001
],
dtype
=
data_type
),
self
.
_scope
,
self
.
_place
)
scale_out_node
=
graph
.
create_var_node_from_desc
(
scale_in_node
.
var
())
inputs
=
{
'X'
:
var_node
,
'InScale'
:
scale_in_node
}
...
...
@@ -299,9 +316,13 @@ class QuantizationTransformPass(object):
var_dtype
=
var_node
.
dtype
())
data_type
=
'float64'
if
var_node
.
dtype
(
)
==
core
.
VarDesc
.
VarType
.
FP64
else
'float32'
self
.
_init_var_node
(
scales_node
,
np
.
zeros
(
[
self
.
_window_size
],
dtype
=
data_type
))
_init_var_node
(
scales_node
,
np
.
zeros
(
[
self
.
_window_size
],
dtype
=
data_type
),
self
.
_scope
,
self
.
_place
)
inputs
[
'Iter'
]
=
self
.
_global_step
outputs
[
'OutScales'
]
=
scales_node
attrs
=
{
...
...
@@ -343,7 +364,12 @@ class QuantizationTransformPass(object):
var_dtype
=
var_node
.
dtype
())
data_type
=
'float64'
if
var_node
.
dtype
(
)
==
core
.
VarDesc
.
VarType
.
FP64
else
'float32'
self
.
_init_var_node
(
scale_in_node
,
np
.
array
([
0.001
],
dtype
=
data_type
))
_init_var_node
(
scale_in_node
,
np
.
array
(
[
0.001
],
dtype
=
data_type
),
self
.
_scope
,
self
.
_place
)
scale_out_node
=
graph
.
create_var_node_from_desc
(
scale_in_node
.
var
())
ins
=
{
'X'
:
var_node
,
'InScale'
:
scale_in_node
}
...
...
@@ -356,13 +382,23 @@ class QuantizationTransformPass(object):
shape
=
[
1
])
data_type
=
'float64'
if
var_node
.
dtype
(
)
==
core
.
VarDesc
.
VarType
.
FP64
else
'float32'
self
.
_init_var_node
(
scale_in_node
,
np
.
ones
([
1
],
dtype
=
data_type
))
_init_var_node
(
scale_in_node
,
np
.
ones
(
[
1
],
dtype
=
data_type
),
self
.
_scope
,
self
.
_place
)
accum_in_node
=
graph
.
create_persistable_node
(
name
=
unique_name
.
generate
(
'accum'
),
var_type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
var_dtype
=
var_node
.
dtype
(),
shape
=
[
1
])
self
.
_init_var_node
(
accum_in_node
,
np
.
ones
([
1
],
dtype
=
data_type
))
_init_var_node
(
accum_in_node
,
np
.
ones
(
[
1
],
dtype
=
data_type
),
self
.
_scope
,
self
.
_place
)
state_out_node
=
graph
.
create_var_node_from_desc
(
state_in_node
.
var
(
))
accum_out_node
=
graph
.
create_var_node_from_desc
(
accum_in_node
.
var
(
...
...
@@ -482,16 +518,6 @@ class QuantizationTransformPass(object):
graph
.
link_to
(
dequant_op_node
,
dequant_var_node
)
return
dequant_var_node
def
_init_var_node
(
self
,
var_node
,
value
):
assert
isinstance
(
value
,
np
.
ndarray
),
'The type of value should be numpy array.'
assert
self
.
_scope
is
not
None
,
\
'The scope cannot be set None when activation_quantize_type equals to range_abs_max.'
assert
self
.
_place
is
not
None
,
\
'The place cannot be set None when activation_quantize_type equals to range_abs_max.'
tensor
=
self
.
_scope
.
var
(
var_node
.
name
()).
get_tensor
()
tensor
.
set
(
value
,
self
.
_place
)
def
_quantized_var_name
(
self
,
var_name
):
"""
Return quantized variable name for the input `var_name`.
...
...
@@ -594,8 +620,8 @@ class QuantizationFreezePass(object):
self
.
_weight_bits
)
self
.
_restore_var
(
input_arg_name
,
quantized_param_v
)
else
:
scale_v
=
self
.
_to_node
(
op_node
.
outputs
,
op_node
.
output
(
'OutScale'
)[
0
])
scale_v
=
graph
.
_find_node_by_name
(
op_node
.
outputs
,
op_node
.
output
(
'OutScale'
)[
0
])
self
.
_var_scale_map
[
input_arg_name
]
=
scale_v
ops
=
graph
.
all_op_nodes
()
...
...
@@ -627,8 +653,8 @@ class QuantizationFreezePass(object):
return
graph
def
_remove_fake_quant_and_dequant_op
(
self
,
graph
,
op_node
):
k
=
self
.
_to_nod
e
(
op_node
.
outputs
,
op_node
.
output
(
'Out'
)[
0
])
v
=
self
.
_to_nod
e
(
op_node
.
inputs
,
op_node
.
input
(
'X'
)[
0
])
k
=
graph
.
_find_node_by_nam
e
(
op_node
.
outputs
,
op_node
.
output
(
'Out'
)[
0
])
v
=
graph
.
_find_node_by_nam
e
(
op_node
.
inputs
,
op_node
.
input
(
'X'
)[
0
])
if
v
.
node
not
in
self
.
_op_input_rename_map
:
self
.
_op_input_rename_map
[
k
.
node
]
=
v
else
:
...
...
@@ -663,8 +689,8 @@ class QuantizationFreezePass(object):
raise
ValueError
(
"Only support one output, but op %s has"
" more than one output."
%
(
op_node
.
name
()))
output_var_node
=
self
.
_to_node
(
op_node
.
outputs
,
op_node
.
output_arg_names
()[
0
])
output_var_node
=
graph
.
_find_node_by_name
(
op_node
.
outputs
,
op_node
.
output_arg_names
()[
0
])
weight_scale_node
=
graph
.
create_persistable_node
(
name
=
unique_name
.
generate
(
'channel_scale'
),
var_type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
...
...
@@ -672,7 +698,9 @@ class QuantizationFreezePass(object):
var_dtype
=
output_var_node
.
dtype
())
data_type
=
'float64'
if
output_var_node
.
dtype
(
)
==
core
.
VarDesc
.
VarType
.
FP64
else
'float32'
self
.
_init_var_node
(
weight_scale_node
,
channel_scale
.
astype
(
data_type
))
_init_var_node
(
weight_scale_node
,
channel_scale
.
astype
(
data_type
),
self
.
_scope
,
self
.
_place
)
dequant_var_node
=
graph
.
create_var_node
(
name
=
self
.
_dequantized_var_name
(
output_var_node
.
name
()),
var_type
=
output_var_node
.
type
(),
...
...
@@ -724,8 +752,8 @@ class QuantizationFreezePass(object):
raise
ValueError
(
"Only support one output, but op %s has"
" more than one output."
%
(
op_node
.
name
()))
output_var_node
=
self
.
_to_node
(
op_node
.
outputs
,
op_node
.
output_arg_names
()[
0
])
output_var_node
=
graph
.
_find_node_by_name
(
op_node
.
outputs
,
op_node
.
output_arg_names
()[
0
])
dequant_var_node
=
graph
.
create_var_node
(
name
=
self
.
_dequantized_var_name
(
output_var_node
.
name
()),
var_type
=
output_var_node
.
type
(),
...
...
@@ -746,24 +774,6 @@ class QuantizationFreezePass(object):
self
.
_op_output_rename_map
[
output_var_node
.
node
]
=
dequant_var_node
return
dequant_var_node
def
_init_var_node
(
self
,
var_node
,
value
):
assert
isinstance
(
value
,
np
.
ndarray
),
'The type of value should be numpy array.'
assert
self
.
_scope
is
not
None
,
\
'The scope cannot be set None when activation_quantize_type equals to range_abs_max.'
assert
self
.
_place
is
not
None
,
\
'The place cannot be set None when activation_quantize_type equals to range_abs_max.'
tensor
=
self
.
_scope
.
var
(
var_node
.
name
()).
get_tensor
()
tensor
.
set
(
value
,
self
.
_place
)
def
_to_node
(
self
,
nodes
,
node_name
):
target_node
=
None
for
n
in
nodes
:
if
n
.
name
()
==
node_name
:
target_node
=
n
assert
target_node
is
not
None
,
"Cannot find the target node in the giving set."
return
target_node
def
_load_var
(
self
,
name
):
return
np
.
array
(
self
.
_scope
.
find_var
(
name
).
get_tensor
())
...
...
python/paddle/fluid/contrib/slim/quantization/quantization_strategy.py
浏览文件 @
32146857
...
...
@@ -45,6 +45,7 @@ class QuantizationStrategy(Strategy):
activation_bits
=
8
,
weight_bits
=
8
,
activation_quantize_type
=
'abs_max'
,
weight_quantize_type
=
'abs_max'
,
save_in_nodes
=
None
,
save_out_nodes
=
None
):
"""
...
...
@@ -66,6 +67,8 @@ class QuantizationStrategy(Strategy):
dynamically each step in both training and testing period. If use
'range_abs_max', a static quantization scale will be calculated
during training and used in inference.
weight_quantize_type (str): quantization type for weights, support 'abs_max' and 'channel_wise_abs_max'.
The 'range_abs_max' usually is not used for weight, since weights are fixed once the model is well trained.
save_in_nodes(list<str>): A list of variable names used to prune graph
for saving inference model.
save_out_nodes(list<str>): A list of variable names used to prune graph
...
...
@@ -81,6 +84,7 @@ class QuantizationStrategy(Strategy):
self
.
activation_bits
=
activation_bits
self
.
weight_bits
=
weight_bits
self
.
activation_quantize_type
=
activation_quantize_type
self
.
weight_quantize_type
=
weight_quantize_type
self
.
save_out_nodes
=
save_out_nodes
self
.
save_in_nodes
=
save_in_nodes
...
...
@@ -100,7 +104,8 @@ class QuantizationStrategy(Strategy):
place
=
context
.
place
,
weight_bits
=
self
.
weight_bits
,
activation_bits
=
self
.
activation_bits
,
activation_quantize_type
=
self
.
activation_quantize_type
)
activation_quantize_type
=
self
.
activation_quantize_type
,
weight_quantize_type
=
self
.
weight_quantize_type
)
transform_pass
.
apply
(
train_ir_graph
)
transform_pass
.
apply
(
test_ir_graph
)
...
...
@@ -134,7 +139,8 @@ class QuantizationStrategy(Strategy):
scope
=
context
.
scope
,
place
=
context
.
place
,
weight_bits
=
self
.
weight_bits
,
activation_bits
=
self
.
activation_bits
)
activation_bits
=
self
.
activation_bits
,
weight_quantize_type
=
self
.
weight_quantize_type
)
freeze_pass
.
apply
(
test_ir_graph
)
# for other strategies
...
...
python/paddle/fluid/contrib/slim/tests/quantization/compress.yaml
浏览文件 @
32146857
...
...
@@ -35,6 +35,8 @@ strategies:
start_epoch
:
0
end_epoch
:
0
float_model_save_path
:
'
./output/float'
mobile_model_save_path
:
'
./output/mobile'
int8_model_save_path
:
'
./output/int8'
weight_bits
:
8
activation_bits
:
8
weight_quantize_type
:
'
abs_max'
...
...
python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
浏览文件 @
32146857
...
...
@@ -256,8 +256,6 @@ class TestQuantizationFreezePass(unittest.TestCase):
place
=
place
,
activation_quantize_type
=
activation_quant_type
,
weight_quantize_type
=
weight_quant_type
)
#transform_pass = QuantizationTransformPass(
# scope=scope, place=place, activation_quantize_type=activation_quant_type)
transform_pass
.
apply
(
main_graph
)
transform_pass
.
apply
(
test_graph
)
dev_name
=
'_gpu_'
if
use_cuda
else
'_cpu_'
...
...
@@ -315,7 +313,6 @@ class TestQuantizationFreezePass(unittest.TestCase):
# Freeze graph for inference, but the weight of fc/conv is still float type.
freeze_pass
=
QuantizationFreezePass
(
scope
=
scope
,
place
=
place
,
weight_quantize_type
=
weight_quant_type
)
#freeze_pass = QuantizationFreezePass(scope=scope, place=place)
freeze_pass
.
apply
(
test_graph
)
if
not
for_ci
:
marked_nodes
=
set
()
...
...
python/paddle/fluid/
imperative
/__init__.py
→
python/paddle/fluid/
dygraph
/__init__.py
浏览文件 @
32146857
文件已移动
python/paddle/fluid/
imperative
/base.py
→
python/paddle/fluid/
dygraph
/base.py
浏览文件 @
32146857
...
...
@@ -22,7 +22,7 @@ __all__ = ['enabled', 'guard', 'to_variable']
def
enabled
():
return
framework
.
_in_
imperative
_mode
()
return
framework
.
_in_
dygraph
_mode
()
@
signature_safe_contextmanager
...
...
@@ -39,14 +39,14 @@ def guard(place=None):
with
framework
.
program_guard
(
train
,
startup
):
with
framework
.
unique_name
.
guard
():
with
framework
.
_
imperative
_guard
(
tracer
):
with
framework
.
_
imperative
_place_guard
(
place
):
with
framework
.
_
dygraph
_guard
(
tracer
):
with
framework
.
_
dygraph
_place_guard
(
place
):
yield
def
to_variable
(
value
,
block
=
None
,
name
=
None
):
if
isinstance
(
value
,
np
.
ndarray
):
assert
enabled
(),
"to_variable could only be called in
imperative
mode"
assert
enabled
(),
"to_variable could only be called in
dygraph
mode"
if
not
block
:
block
=
framework
.
default_main_program
().
current_block
()
...
...
python/paddle/fluid/
imperative
/checkpoint.py
→
python/paddle/fluid/
dygraph
/checkpoint.py
浏览文件 @
32146857
...
...
@@ -68,7 +68,7 @@ def save_persistables(vardict, dirname, filename=None):
dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden,
init_cell)
param_path = "./my_paddle_model"
fluid.
imperative.checkpoint
.save_persistables(ptb_model.state_dict(), dirname=param_path,
fluid.
dygraph
.save_persistables(ptb_model.state_dict(), dirname=param_path,
layer=ptb_model)
"""
if
isinstance
(
vardict
,
collections
.
OrderedDict
):
...
...
@@ -97,17 +97,17 @@ def load_persistables(vardict, dirname, filename=None):
Examples:
.. code-block:: python
my_layer = layer(fluid.
imperative
.Layer)
my_layer = layer(fluid.
dygraph
.Layer)
param_path = "./my_paddle_model"
param_dict = fluid.
imperative.checkpoint
.load_persistables(my_layer.parameters(), param_path)
param_dict = fluid.
dygraph
.load_persistables(my_layer.parameters(), param_path)
param_1 = param_dict['PtbModel_0.w_1']
or:
my_layer = layer(fluid.
imperative
.Layer)
my_layer = layer(fluid.
dygraph
.Layer)
param_path = "./my_paddle_model"
filename = "model.file"
param_dict = fluid.
imperative.checkpoint
.load_persistables(my_layer.state_dict(), param_path,
param_dict = fluid.
dygraph
.load_persistables(my_layer.state_dict(), param_path,
filename=filename)
param_1 = param_dict['PtbModel_0.w_1']
...
...
python/paddle/fluid/
imperative
/layer_object_helper.py
→
python/paddle/fluid/
dygraph
/layer_object_helper.py
浏览文件 @
32146857
...
...
@@ -16,7 +16,7 @@ from __future__ import print_function
import
copy
import
six
from
..framework
import
Parameter
,
_in_
imperative
_mode
from
..framework
import
Parameter
,
_in_
dygraph
_mode
from
..param_attr
import
ParamAttr
from
..
import
core
from
six.moves
import
zip
...
...
python/paddle/fluid/
imperative
/layers.py
→
python/paddle/fluid/
dygraph
/layers.py
浏览文件 @
32146857
...
...
@@ -283,7 +283,7 @@ class PyLayer(core.PyLayer):
@
classmethod
def
__call__
(
cls
,
*
inputs
):
tracer
=
framework
.
_
imperative
_tracer
()
tracer
=
framework
.
_
dygraph
_tracer
()
block
=
framework
.
default_main_program
().
current_block
()
ivar_inputs
=
[
x
.
_ivar
for
x
in
inputs
]
...
...
python/paddle/fluid/
imperative
/nn.py
→
python/paddle/fluid/
dygraph
/nn.py
浏览文件 @
32146857
...
...
@@ -134,7 +134,7 @@ class Conv2D(layers.Layer):
outputs
=
{
'Out'
:
[
pre_act
]},
attrs
=
{
'axis'
:
1
})
# Currently, we don't support inplace in
imperative
mode
# Currently, we don't support inplace in
dygraph
mode
return
self
.
_helper
.
append_activation
(
pre_act
,
act
=
self
.
_act
)
...
...
@@ -460,7 +460,7 @@ class FC(layers.Layer):
attrs
=
{
'axis'
:
self
.
_num_flatten_dims
})
else
:
pre_activation
=
pre_bias
# Currently, we don't support inplace in
imperative
mode
# Currently, we don't support inplace in
dygraph
mode
return
self
.
_helper
.
append_activation
(
pre_activation
,
act
=
self
.
_act
)
...
...
@@ -582,7 +582,7 @@ class BatchNorm(layers.Layer):
"use_global_stats"
:
self
.
_use_global_stats
})
# Currently, we don't support inplace in
imperative
mode
# Currently, we don't support inplace in
dygraph
mode
return
self
.
_helper
.
append_activation
(
batch_norm_out
,
self
.
_act
)
...
...
@@ -621,7 +621,7 @@ class Embedding(layers.Layer):
dict_size = len(dataset.ids)
input = fluid.layers.data(name='ids', shape=[32, 32], dtype='float32')
embedding = fluid.
imperative
.Embedding(size=[dict_size, 16])
embedding = fluid.
dygraph
.Embedding(size=[dict_size, 16])
fc = embedding(input)
"""
...
...
python/paddle/fluid/
imperative
/profiler.py
→
python/paddle/fluid/
dygraph
/profiler.py
浏览文件 @
32146857
文件已移动
python/paddle/fluid/
imperative
/tracer.py
→
python/paddle/fluid/
dygraph
/tracer.py
浏览文件 @
32146857
...
...
@@ -24,12 +24,12 @@ __all__ = ['Tracer']
def
release_op
(
op
):
del
framework
.
_
imperative
_tracer
().
_ops
[
op
.
_trace_id
]
del
framework
.
_
dygraph
_tracer
().
_ops
[
op
.
_trace_id
]
class
Tracer
(
core
.
Tracer
):
"""
Python wrapper of
imperative
tracer
Python wrapper of
dygraph
tracer
"""
def
__init__
(
self
,
block
):
...
...
python/paddle/fluid/framework.py
浏览文件 @
32146857
...
...
@@ -75,20 +75,20 @@ GRAD_VAR_SUFFIX = core.kGradVarSuffix()
ZERO_VAR_SUFFIX
=
core
.
kZeroVarSuffix
()
CONTROL_DEP_VAR_PREFIX
=
core
.
kControlDepVarName
()
_
imperative
_tracer_
=
None
_
imperative
_current_expected_place_
=
None
_
dygraph
_tracer_
=
None
_
dygraph
_current_expected_place_
=
None
def
_in_
imperative
_mode
():
return
_
imperative
_tracer_
is
not
None
def
_in_
dygraph
_mode
():
return
_
dygraph
_tracer_
is
not
None
def
_
imperative
_tracer
():
return
_
imperative
_tracer_
def
_
dygraph
_tracer
():
return
_
dygraph
_tracer_
def
_current_expected_place
():
return
_
imperative
_current_expected_place_
return
_
dygraph
_current_expected_place_
def
_cpu_num
():
...
...
@@ -396,7 +396,7 @@ class Variable(object):
if
not
isinstance
(
dtype
,
core
.
VarDesc
.
VarType
):
dtype
=
convert_np_dtype_to_dtype_
(
dtype
)
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
# record vars in tracer rather than blocks
self
.
_ivar
=
kwargs
.
get
(
"ivar"
,
None
)
if
not
self
.
_ivar
:
...
...
@@ -406,7 +406,7 @@ class Variable(object):
_current_expected_place
(),
stop_gradient
,
True
if
persistable
else
False
)
if
persistable
:
_
imperative
_tracer
().
trace_var
(
name
,
self
)
_
dygraph
_tracer
().
trace_var
(
name
,
self
)
else
:
self
.
error_clip
=
error_clip
...
...
@@ -515,8 +515,8 @@ class Variable(object):
Returns:
str: The debug string.
"""
if
_in_
imperative
_mode
():
# TODO(panyx0718): add more
imperative
debug info.
if
_in_
dygraph
_mode
():
# TODO(panyx0718): add more
dygraph
debug info.
return
'name %s, dtype: %s shape: %s'
%
(
self
.
name
,
self
.
dtype
,
self
.
shape
)
...
...
@@ -548,42 +548,42 @@ class Variable(object):
@
property
def
_stop_gradient
(
self
):
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
return
self
.
_ivar
.
stop_gradient
else
:
return
self
.
stop_gradient
@
_stop_gradient
.
setter
def
_stop_gradient
(
self
,
s
):
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
self
.
_ivar
.
stop_gradient
=
s
else
:
self
.
stop_gradient
=
s
@
property
def
persistable
(
self
):
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
return
self
.
_ivar
.
persistable
else
:
return
self
.
desc
.
persistable
()
@
persistable
.
setter
def
persistable
(
self
,
p
):
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
return
self
.
_ivar
.
persistable
else
:
self
.
desc
.
set_persistable
(
p
)
@
property
def
name
(
self
):
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
return
self
.
_ivar
.
name
else
:
return
cpt
.
to_text
(
self
.
desc
.
name
())
@
name
.
setter
def
name
(
self
,
new_name
):
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
self
.
_ivar
.
name
=
new_name
else
:
self
.
desc
.
set_name
(
new_name
)
...
...
@@ -591,26 +591,26 @@ class Variable(object):
@
property
def
shape
(
self
):
# convert to tuple, make it as same as numpy API.
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
return
self
.
_ivar
.
shape
else
:
return
tuple
(
self
.
desc
.
shape
())
@
property
def
dtype
(
self
):
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
return
self
.
_ivar
.
dtype
else
:
return
self
.
desc
.
dtype
()
@
property
def
lod_level
(
self
):
# TODO(minqiyang): Support lod_level in
imperative
mode
# TODO(minqiyang): Support lod_level in
dygraph
mode
return
self
.
desc
.
lod_level
()
@
property
def
type
(
self
):
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
return
self
.
_ivar
.
dtype
else
:
return
self
.
desc
.
type
()
...
...
@@ -918,7 +918,7 @@ class Operator(object):
inputs
=
None
,
outputs
=
None
,
attrs
=
None
):
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
if
type
is
None
:
raise
ValueError
(
"`type` to initialized an Operator can not be None."
)
...
...
@@ -1037,7 +1037,7 @@ class Operator(object):
for
arg
in
out_args
:
out_arg_names
.
append
(
cpt
.
to_text
(
arg
.
name
))
# TODO(minqiyang): could we remove variable's op in static mode?
if
not
_in_
imperative
_mode
():
if
not
_in_
dygraph
_mode
():
arg
.
op
=
self
self
.
desc
.
set_output
(
out_proto
.
name
,
out_arg_names
)
...
...
@@ -1083,7 +1083,7 @@ class Operator(object):
@
property
def
type
(
self
):
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
return
self
.
iop
.
type
else
:
return
self
.
desc
.
type
()
...
...
@@ -1626,7 +1626,7 @@ class Block(object):
Returns:
Operator: the append Operator.
"""
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
op
=
Operator
(
block
=
self
,
desc
=
None
,
...
...
@@ -1638,9 +1638,8 @@ class Block(object):
# record ops in tracer rather than blocks
#
# TODO(minqiyang): add op stop_gradient support in static mode too.
# currently, we only support stop_gradient in imperative mode.
_imperative_tracer
().
trace_op
(
op
,
kwargs
.
get
(
"stop_gradient"
,
False
))
# currently, we only support stop_gradient in dygraph mode.
_dygraph_tracer
().
trace_op
(
op
,
kwargs
.
get
(
"stop_gradient"
,
False
))
else
:
op_desc
=
self
.
desc
.
append_op
()
op
=
Operator
(
...
...
@@ -1699,7 +1698,7 @@ class Block(object):
return
self
.
ops
[
start
:
end
]
def
_prepend_op
(
self
,
*
args
,
**
kwargs
):
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
op
=
Operator
(
self
,
None
,
...
...
@@ -1707,8 +1706,7 @@ class Block(object):
inputs
=
kwargs
.
get
(
"inputs"
,
None
),
outputs
=
kwargs
.
get
(
"outputs"
,
None
),
attrs
=
kwargs
.
get
(
"attrs"
,
None
))
_imperative_tracer
().
trace_op
(
op
,
kwargs
.
get
(
"stop_gradient"
,
False
))
_dygraph_tracer
().
trace_op
(
op
,
kwargs
.
get
(
"stop_gradient"
,
False
))
else
:
op_desc
=
self
.
desc
.
_prepend_op
()
op
=
Operator
(
...
...
@@ -2347,40 +2345,6 @@ class IrGraph(object):
"""
return
{
IrOpNode
(
node
)
for
node
in
self
.
graph
.
nodes
()
if
node
.
is_op
()}
def
_find_var_node
(
self
,
key
):
"""
Get a variable node by the `key` from this graph. The key
can be a node name or a node id.
WARNS:
There are some nodes may have the same name. So, be
cautious about using this method when you find the
target var node by its name.
Args:
key(str|int): The str type denotes that the target variable node's name.
And the int type denotes that the target variable node's id.
Raises:
ValueError: If this graph doesn't have a variable with the giving name or id.
Returns:
IrVarNode: the variable node with the giving name or id.
"""
target_var_node
=
None
var_nodes
=
self
.
all_var_nodes
()
if
isinstance
(
key
,
six
.
string_types
):
for
var_node
in
var_nodes
:
if
var_node
.
name
()
==
key
:
target_var_node
=
var_node
elif
isinstance
(
key
,
int
):
for
var_node
in
var_nodes
:
if
var_node
.
id
()
==
key
:
target_var_node
=
var_node
if
target_var_node
is
None
:
raise
ValueError
(
"var_node %s not in this graph"
%
key
)
return
target_var_node
def
create_persistable_node
(
self
,
name
,
var_type
,
shape
,
var_dtype
):
"""
Create a persistable variable node in the graph. In IrGraph,
...
...
@@ -2525,14 +2489,6 @@ class IrGraph(object):
core
.
graph_safe_remove_nodes
(
self
.
graph
,
original_nodes
)
def
resolve_hazard
(
self
):
def
_to_node
(
nodes
,
node_name
):
target_node
=
None
for
n
in
nodes
:
if
n
.
name
()
==
node_name
:
target_node
=
n
assert
target_node
is
not
None
,
"Cannot find the target node in the giving set."
return
target_node
ordered_nodes
=
core
.
topology_sort
(
self
.
graph
)
var_nodes
=
dict
()
for
node
in
ordered_nodes
:
...
...
@@ -2540,16 +2496,17 @@ class IrGraph(object):
for
each_var_name
in
node
.
op
().
input_arg_names
():
if
each_var_name
not
in
var_nodes
:
var_nodes
[
each_var_name
]
=
[
_to_nod
e
(
node
.
inputs
,
each_var_name
)
self
.
_find_node_by_nam
e
(
node
.
inputs
,
each_var_name
)
]
for
each_var_name
in
node
.
op
().
output_arg_names
():
if
each_var_name
not
in
var_nodes
:
var_nodes
[
each_var_name
]
=
[
_to_nod
e
(
node
.
outputs
,
each_var_name
)
self
.
_find_node_by_nam
e
(
node
.
outputs
,
each_var_name
)
]
else
:
var_nodes
[
each_var_name
].
append
(
_to_node
(
node
.
outputs
,
each_var_name
))
self
.
_find_node_by_name
(
node
.
outputs
,
each_var_name
))
self
.
graph
.
resolve_hazard
(
var_nodes
)
def
has_circle
(
self
):
...
...
@@ -2662,6 +2619,17 @@ class IrGraph(object):
program
=
Program
.
_construct_from_desc
(
desc
)
return
program
def
_find_node_by_name
(
self
,
nodes
,
node_name
):
"""
Find a node in the giving nodes set by the name.
"""
target_node
=
None
for
n
in
nodes
:
if
n
.
name
()
==
node_name
:
target_node
=
n
assert
target_node
is
not
None
,
"Cannot find the target node in the giving set."
return
target_node
def
_update_desc_attr
(
self
,
desc
,
name
,
val
):
"""
Update the value of desc's attribute by attribute's name.
...
...
@@ -3541,22 +3509,22 @@ def _get_var(name, program=None):
@
signature_safe_contextmanager
def
_
imperative
_guard
(
tracer
):
global
_
imperative
_tracer_
tmp_trace
=
_
imperative
_tracer_
_
imperative
_tracer_
=
tracer
def
_
dygraph
_guard
(
tracer
):
global
_
dygraph
_tracer_
tmp_trace
=
_
dygraph
_tracer_
_
dygraph
_tracer_
=
tracer
yield
_
imperative
_tracer_
=
tmp_trace
_
dygraph
_tracer_
=
tmp_trace
@
signature_safe_contextmanager
def
_
imperative
_place_guard
(
place
):
global
_
imperative
_current_expected_place_
tmp_place
=
_
imperative
_current_expected_place_
_
imperative
_current_expected_place_
=
place
def
_
dygraph
_place_guard
(
place
):
global
_
dygraph
_current_expected_place_
tmp_place
=
_
dygraph
_current_expected_place_
_
dygraph
_current_expected_place_
=
place
yield
_
imperative
_current_expected_place_
=
tmp_place
_
dygraph
_current_expected_place_
=
tmp_place
python/paddle/fluid/initializer.py
浏览文件 @
32146857
...
...
@@ -165,7 +165,7 @@ class ConstantInitializer(Initializer):
'force_cpu'
:
self
.
_force_cpu
or
force_init_on_cpu
()
},
stop_gradient
=
True
)
if
not
framework
.
_in_
imperative
_mode
():
if
not
framework
.
_in_
dygraph
_mode
():
var
.
op
=
op
return
op
...
...
@@ -245,7 +245,7 @@ class UniformInitializer(Initializer):
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
if
not
framework
.
_in_
imperative
_mode
():
if
not
framework
.
_in_
dygraph
_mode
():
var
.
op
=
op
return
op
...
...
@@ -324,7 +324,7 @@ class NormalInitializer(Initializer):
outputs
=
{
"Out"
:
var
},
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
if
not
framework
.
_in_
imperative
_mode
():
if
not
framework
.
_in_
dygraph
_mode
():
var
.
op
=
op
return
op
...
...
@@ -403,7 +403,7 @@ class TruncatedNormalInitializer(Initializer):
outputs
=
{
"Out"
:
var
},
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
if
not
framework
.
_in_
imperative
_mode
():
if
not
framework
.
_in_
dygraph
_mode
():
var
.
op
=
op
return
op
...
...
@@ -509,7 +509,7 @@ class XavierInitializer(Initializer):
"seed"
:
self
.
_seed
},
stop_gradient
=
True
)
if
not
framework
.
_in_
imperative
_mode
():
if
not
framework
.
_in_
dygraph
_mode
():
var
.
op
=
op
return
op
...
...
@@ -610,7 +610,7 @@ class MSRAInitializer(Initializer):
"seed"
:
self
.
_seed
},
stop_gradient
=
True
)
if
not
framework
.
_in_
imperative
_mode
():
if
not
framework
.
_in_
dygraph
_mode
():
var
.
op
=
op
return
op
...
...
@@ -709,7 +709,7 @@ class BilinearInitializer(Initializer):
'shape'
:
list
(
shape
),
value_name
:
values
})
if
not
framework
.
_in_
imperative
_mode
():
if
not
framework
.
_in_
dygraph
_mode
():
var
.
op
=
op
return
op
...
...
@@ -768,7 +768,7 @@ class NumpyArrayInitializer(Initializer):
value_name
:
values
},
stop_gradient
=
True
)
if
not
framework
.
_in_
imperative
_mode
():
if
not
framework
.
_in_
dygraph
_mode
():
var
.
op
=
op
return
op
...
...
python/paddle/fluid/install_check.py
浏览文件 @
32146857
...
...
@@ -17,7 +17,7 @@ from .param_attr import ParamAttr
from
.initializer
import
Constant
from
.
import
layers
from
.
import
backward
from
.
imperative
import
Layer
,
nn
from
.
dygraph
import
Layer
,
nn
from
.
import
executor
from
.
import
core
...
...
python/paddle/fluid/layer_helper.py
浏览文件 @
32146857
...
...
@@ -17,7 +17,7 @@ from __future__ import print_function
import
copy
import
six
from
.framework
import
Parameter
,
dtype_is_floating
,
_in_
imperative
_mode
from
.framework
import
Parameter
,
dtype_is_floating
,
_in_
dygraph
_mode
from
.
import
unique_name
from
paddle.fluid.initializer
import
Constant
,
Xavier
from
.param_attr
import
ParamAttr
...
...
@@ -30,9 +30,9 @@ class LayerHelper(LayerHelperBase):
def
__init__
(
self
,
layer_type
,
**
kwargs
):
self
.
kwargs
=
kwargs
name
=
self
.
kwargs
.
get
(
'name'
,
None
)
# TODO(panyx0718, minqiyang):
imperative
mode
# TODO(panyx0718, minqiyang):
dygraph
mode
# can not use both `layer_type` and `name`. Deprecate LayerHelper
# and write a Helper for
imperative
mode.
# and write a Helper for
dygraph
mode.
if
name
is
None
:
self
.
kwargs
[
'name'
]
=
unique_name
.
generate
(
layer_type
)
...
...
python/paddle/fluid/layer_helper_base.py
浏览文件 @
32146857
...
...
@@ -17,7 +17,7 @@ from __future__ import print_function
import
copy
import
numpy
as
np
from
.framework
import
Variable
,
default_main_program
,
default_startup_program
,
_in_
imperative
_mode
,
_current_expected_place
from
.framework
import
Variable
,
default_main_program
,
default_startup_program
,
_in_
dygraph
_mode
,
_current_expected_place
from
.
import
unique_name
from
.param_attr
import
ParamAttr
,
WeightNormParamAttr
from
.
import
core
...
...
@@ -54,8 +54,8 @@ class LayerHelperBase(object):
Return Variable construct from value
"""
if
isinstance
(
value
,
np
.
ndarray
):
assert
_in_
imperative
_mode
(
),
"to_variable could only be called in
imperative
mode"
assert
_in_
dygraph
_mode
(
),
"to_variable could only be called in
dygraph
mode"
if
not
block
:
block
=
default_main_program
().
current_block
()
...
...
@@ -302,8 +302,8 @@ class LayerHelperBase(object):
param
=
self
.
_create_weight_normalize
(
attr
,
shape
,
dtype
)
WeightNormParamAttr
.
params_with_weight_norm
.
append
(
param
)
return
param
if
_in_
imperative
_mode
():
# In
imperative
mode, we want the returned parameter to be
if
_in_
dygraph
_mode
():
# In
dygraph
mode, we want the returned parameter to be
# initialized so that it can be used imperatively.
return
self
.
main_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
...
...
@@ -370,7 +370,7 @@ class LayerHelperBase(object):
initializer: initializer to use
"""
assert
isinstance
(
var
,
Variable
)
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
initializer
(
var
,
var
.
block
)
else
:
self
.
startup_program
.
global_block
().
create_var
(
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
32146857
...
...
@@ -23,8 +23,8 @@ import os
import
inspect
from
..layer_helper
import
LayerHelper
from
..initializer
import
Normal
,
Constant
,
NumpyArrayInitializer
from
..framework
import
Variable
,
OpProtoHolder
,
_in_
imperative
_mode
from
..
imperative
import
base
from
..framework
import
Variable
,
OpProtoHolder
,
_in_
dygraph
_mode
from
..
dygraph
import
base
from
..param_attr
import
ParamAttr
from
.layer_function_generator
import
autodoc
,
templatedoc
,
_generate_doc_string_
from
.tensor
import
concat
,
assign
...
...
@@ -32,7 +32,7 @@ from . import utils
from
..
import
unique_name
from
functools
import
reduce
from
..
import
core
from
..
imperative
import
layers
from
..
dygraph
import
layers
__all__
=
[
'fc'
,
...
...
@@ -296,7 +296,6 @@ def fc(input,
data_2 = fluid.layers.data(name="data_2", shape=[24, 36], dtype="float32")
fc = fluid.layers.fc(input=[data_1, data_2], size=1000, act="tanh")
"""
helper
=
LayerHelper
(
"fc"
,
**
locals
())
dtype
=
helper
.
input_dtype
()
...
...
@@ -3279,6 +3278,8 @@ def layer_norm(input,
>>> dtype='float32')
>>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
"""
assert
_in_dygraph_mode
(
)
is
not
True
,
"please use FC instead of fc in dygraph mode!"
helper
=
LayerHelper
(
'layer_norm'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
...
...
@@ -5866,11 +5867,49 @@ def multiplex(inputs, index):
"""
${comment}
>>> import paddle.fluid as fluid
>>> x1 = fluid.layers.data(name='x1', shape=[4], dtype='float32')
>>> x2 = fluid.layers.data(name='x2', shape=[4], dtype='float32')
>>> index = fluid.layers.data(name='index', shape=[1], dtype='int32')
>>> out = fluid.layers.multiplex(inputs=[x1, x2], index=index)
For Example:
.. code-block:: text
case 1:
Given:
X = [[[0,0,3,4], [0,1,3,4], [0,2,4,4], [0,3,3,4]],
[[1,0,3,4], [1,1,7,8], [1,2,4,2], [1,3,3,4]],
[[2,0,3,4], [2,1,7,8], [2,2,4,2], [2,3,3,4]],
[[3,0,3,4], [3,1,7,8], [3,2,4,2], [3,3,3,4]]]
index = [3,0,1,2]
out:[[3 0 3 4] // X[3,0] (3 = index[i], 0 = i); i=0
[0 1 3 4] // X[0,1] (0 = index[i], 1 = i); i=1
[1 2 4 2] // X[1,2] (0 = index[i], 2 = i); i=2
[2 3 3 4]] // X[2,3] (0 = index[i], 3 = i); i=3
case 2:
Given:
X = [[[0,0,3,4], [0,1,3,4], [0,2,4,4], [0,3,3,4]],
[[1,0,3,4], [1,1,7,8], [1,2,4,2], [1,3,3,4]]]
index = [1,0]
out:[[1 0 3 4] // X[1,0] (3 = index[0], 0 = i); i=1
[0 1 3 4] // X[0,1] (0 = index[1], 1 = i); i=2
[0 2 4 4] // X[0,2] (0 = 0, 2 = i); i=3
[0 3 3 4]] // X[0,3] (0 = 0, 3 = i); i=4
Examples:
.. code-block:: python
import paddle.fluid as fluid
x1 = fluid.layers.data(name='x1', shape=[4], dtype='float32')
x2 = fluid.layers.data(name='x2', shape=[4], dtype='float32')
index = fluid.layers.data(name='index', shape=[1], dtype='int32')
out = fluid.layers.multiplex(inputs=[x1, x2], index=index)
Args:
inputs (list): ${x_comment}.
...
...
@@ -6405,8 +6444,8 @@ def squeeze(input, axes, name=None):
x = layers.data(name='x', shape=[5, 1, 10])
y = layers.sequeeze(input=x, axes=[1])
"""
assert
not
_in_
imperative
_mode
(),
(
"squeeze layer is not supported in
imperative
mode yet."
)
assert
not
_in_
dygraph
_mode
(),
(
"squeeze layer is not supported in
dygraph
mode yet."
)
helper
=
LayerHelper
(
"squeeze"
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
input
.
dtype
)
x_shape
=
helper
.
create_variable_for_type_inference
(
dtype
=
input
.
dtype
)
...
...
@@ -9144,7 +9183,7 @@ def _elementwise_op(helper):
op_type
=
helper
.
layer_type
x
=
helper
.
kwargs
.
get
(
'x'
,
None
)
y
=
helper
.
kwargs
.
get
(
'y'
,
None
)
if
_in_
imperative
_mode
():
if
_in_
dygraph
_mode
():
x
=
base
.
to_variable
(
x
)
y
=
base
.
to_variable
(
y
)
...
...
python/paddle/fluid/layers/tensor.py
浏览文件 @
32146857
...
...
@@ -20,7 +20,6 @@ from ..framework import convert_np_dtype_to_dtype_
from
..framework
import
Variable
from
..initializer
import
Constant
,
force_init_on_cpu
from
..core
import
VarDesc
from
..imperative
import
base
as
imperative_base
from
.layer_function_generator
import
templatedoc
import
numpy
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
32146857
...
...
@@ -30,7 +30,6 @@ from .initializer import Constant
from
.layer_helper
import
LayerHelper
from
.layers
import
ops
from
.regularizer
import
append_regularization_ops
from
.imperative
import
base
as
imperative_base
from
paddle.fluid
import
core
from
paddle.fluid.layers
import
tensor
from
functools
import
reduce
...
...
@@ -169,7 +168,7 @@ class Optimizer(object):
name
=
self
.
_name
+
"_"
+
name
if
(
name
in
self
.
_accumulators
and
param
.
name
in
self
.
_accumulators
[
name
]):
if
framework
.
_in_
imperative
_mode
():
if
framework
.
_in_
dygraph
_mode
():
return
self
.
_accumulators
[
name
][
param
.
name
]
raise
Exception
(
"Accumulator {} already exists for parameter {}"
.
format
(
name
,
param
.
name
))
...
...
@@ -396,11 +395,11 @@ class Optimizer(object):
"""
self
.
_dtype
=
loss
.
dtype
optimize_ops
=
[]
if
framework
.
_in_
imperative
_mode
():
if
framework
.
_in_
dygraph
_mode
():
if
parameter_list
is
not
None
:
parameters
=
parameter_list
else
:
parameters
=
framework
.
_
imperative
_tracer
().
all_parameters
()
parameters
=
framework
.
_
dygraph
_tracer
().
all_parameters
()
params_grads
=
[]
for
param
in
parameters
:
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
32146857
...
...
@@ -262,14 +262,14 @@ class OpTest(unittest.TestCase):
if
isinstance
(
value
,
tuple
):
data
=
value
[
0
]
lod
=
value
[
1
]
v
=
fluid
.
imperative
.
base
.
to_variable
(
value
=
data
)
v
=
fluid
.
dygraph
.
base
.
to_variable
(
value
=
data
)
v
.
_ivar
.
value
().
get_tensor
().
set_recursive_sequence_lengths
(
lod
)
return
v
else
:
return
fluid
.
imperative
.
base
.
to_variable
(
value
)
return
fluid
.
dygraph
.
base
.
to_variable
(
value
)
def
_calc_
imperative
_output
(
self
,
place
,
parallel
=
False
,
no_check_set
=
None
):
with
fluid
.
imperative
.
base
.
guard
(
place
=
place
):
def
_calc_
dygraph
_output
(
self
,
place
,
parallel
=
False
,
no_check_set
=
None
):
with
fluid
.
dygraph
.
base
.
guard
(
place
=
place
):
block
=
fluid
.
default_main_program
().
global_block
()
# prepare input variable
...
...
@@ -316,7 +316,7 @@ class OpTest(unittest.TestCase):
return
outputs
def
_calc_output
(
self
,
place
,
parallel
=
False
,
no_check_set
=
None
):
def
_calc_output
(
self
,
place
,
parallel
=
False
,
no_check_set
=
None
,
loss
=
None
):
program
=
Program
()
block
=
program
.
global_block
()
self
.
_append_ops
(
block
)
...
...
@@ -329,8 +329,14 @@ class OpTest(unittest.TestCase):
use_cuda
=
False
if
isinstance
(
place
,
fluid
.
CUDAPlace
(
0
)):
use_cuda
=
True
if
loss
:
executor
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
loss_name
=
loss
.
name
,
main_program
=
program
)
use_cuda
=
use_cuda
,
loss_name
=
loss
.
name
,
main_program
=
program
)
else
:
executor
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
main_program
=
program
)
else
:
executor
=
Executor
(
place
)
...
...
@@ -364,9 +370,9 @@ class OpTest(unittest.TestCase):
atol
,
no_check_set
=
None
,
equal_nan
=
False
,
check_
imperative
=
False
):
if
check_
imperative
:
imperative_outs
=
self
.
_calc_imperative
_output
(
check_
dygraph
=
False
):
if
check_
dygraph
:
dygraph_outs
=
self
.
_calc_dygraph
_output
(
place
,
no_check_set
=
no_check_set
)
outs
,
fetch_list
=
self
.
_calc_output
(
place
,
no_check_set
=
no_check_set
)
...
...
@@ -393,8 +399,8 @@ class OpTest(unittest.TestCase):
type
(
sub_out
))
for
item
in
sub_out
:
sub_out_name
,
expect
=
item
[
0
],
item
[
1
]
if
check_
imperative
:
imperative_actual
=
imperative
_outs
[
sub_out_name
][
0
]
if
check_
dygraph
:
imperative_actual
=
dygraph
_outs
[
sub_out_name
][
0
]
imperative_actual_t
=
np
.
array
(
imperative_actual
.
_ivar
.
value
().
get_tensor
())
idx
=
find_actual
(
sub_out_name
,
fetch_list
)
...
...
@@ -407,7 +413,7 @@ class OpTest(unittest.TestCase):
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
"Output ("
+
sub_out_name
+
") has diff at "
+
str
(
place
))
if
check_
imperative
:
if
check_
dygraph
:
self
.
assertTrue
(
np
.
allclose
(
imperative_actual_t
,
...
...
@@ -415,21 +421,21 @@ class OpTest(unittest.TestCase):
atol
=
atol
,
equal_nan
=
equal_nan
),
"Output ("
+
sub_out_name
+
") has diff at "
+
str
(
place
)
+
" in
imperative
mode"
)
str
(
place
)
+
" in
dygraph
mode"
)
if
isinstance
(
expect
,
tuple
):
self
.
assertListEqual
(
actual
.
recursive_sequence_lengths
(),
expect
[
1
],
"Output ("
+
sub_out_name
+
") has different lod at "
+
str
(
place
))
if
check_
imperative
:
if
check_
dygraph
:
self
.
assertListEqual
(
imperative_actual
.
_ivar
.
value
().
get_tensor
()
.
recursive_sequence_lengths
(),
expect
[
1
],
"Output ("
+
out_name
+
") has different lod at "
+
str
(
place
)
+
" in
imperative
mode"
)
str
(
place
)
+
" in
dygraph
mode"
)
else
:
if
check_
imperative
:
imperative_actual
=
imperative
_outs
[
out_name
][
0
]
if
check_
dygraph
:
imperative_actual
=
dygraph
_outs
[
out_name
][
0
]
imperative_actual_t
=
np
.
array
(
imperative_actual
.
_ivar
.
value
().
get_tensor
())
idx
=
find_actual
(
out_name
,
fetch_list
)
...
...
@@ -443,7 +449,7 @@ class OpTest(unittest.TestCase):
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
str
(
actual_t
)
+
" in class "
+
self
.
__class__
.
__name__
)
if
check_
imperative
:
if
check_
dygraph
:
self
.
assertTrue
(
np
.
allclose
(
imperative_actual_t
,
...
...
@@ -458,12 +464,12 @@ class OpTest(unittest.TestCase):
self
.
assertListEqual
(
actual
.
recursive_sequence_lengths
(),
expect
[
1
],
"Output ("
+
out_name
+
") has different lod at "
+
str
(
place
))
if
check_
imperative
:
if
check_
dygraph
:
self
.
assertListEqual
(
imperative_actual
.
_ivar
.
value
().
get_tensor
()
.
recursive_sequence_lengths
(),
expect
[
1
],
"Output ("
+
out_name
+
") has different lod at "
+
str
(
place
)
+
" in
imperative
mode"
)
str
(
place
)
+
" in
dygraph
mode"
)
def
_get_places
(
self
):
if
self
.
dtype
==
np
.
float16
:
...
...
@@ -490,11 +496,11 @@ class OpTest(unittest.TestCase):
atol
=
1e-5
,
no_check_set
=
None
,
equal_nan
=
False
,
check_
imperative
=
False
):
check_
dygraph
=
False
):
places
=
self
.
_get_places
()
for
place
in
places
:
self
.
check_output_with_place
(
place
,
atol
,
no_check_set
,
equal_nan
,
check_
imperative
)
check_
dygraph
)
def
check_output_customized
(
self
,
checker
):
places
=
self
.
_get_places
()
...
...
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
浏览文件 @
32146857
...
...
@@ -43,6 +43,7 @@ class TestParallelExecutorBase(unittest.TestCase):
use_ir_memory_optimize
=
True
,
enable_inplace
=
True
,
fuse_elewise_add_act_ops
=
False
,
fuse_all_optimizer_ops
=
False
,
fuse_all_reduce_ops
=
False
,
fuse_relu_depthwise_conv
=
False
,
optimizer
=
fluid
.
optimizer
.
Adam
,
...
...
@@ -81,6 +82,7 @@ class TestParallelExecutorBase(unittest.TestCase):
build_strategy
.
fuse_elewise_add_act_ops
=
fuse_elewise_add_act_ops
build_strategy
.
fuse_relu_depthwise_conv
=
fuse_relu_depthwise_conv
build_strategy
.
memory_optimize
=
False
if
memory_opt
else
use_ir_memory_optimize
build_strategy
.
fuse_all_optimizer_ops
=
fuse_all_optimizer_ops
build_strategy
.
fuse_all_reduce_ops
=
fuse_all_reduce_ops
# python memory optimization is conflict with inplace pass.
# Use ir graph memory optimization after inplace pass is the correct way.
...
...
python/paddle/fluid/tests/unittests/test_alloc_continuous_space_op.py
浏览文件 @
32146857
...
...
@@ -16,8 +16,10 @@ from __future__ import print_function
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
from
paddle.fluid
import
core
alignment
=
256
class
TestAllocContinuousSpace
(
OpTest
):
...
...
@@ -29,11 +31,11 @@ class TestAllocContinuousSpace(OpTest):
self
.
constant
=
attrs
[
"constant"
]
self
.
set_constant
=
attrs
[
"set_constant"
]
self
.
Inputs
=
self
.
init_input
()
self
.
FusedOutput
=
self
.
init_output
(
self
.
Inputs
,
self
.
set_constant
,
self
.
constant
)
self
.
Outputs
,
self
.
FusedOutput
=
self
.
init_output
(
self
.
Inputs
,
self
.
set_constant
,
self
.
constant
)
self
.
inputs
=
{
'Input'
:
self
.
Inputs
}
self
.
attrs
=
attrs
self
.
outputs
=
{
'Output'
:
self
.
In
puts
,
'FusedOutput'
:
self
.
FusedOutput
}
self
.
outputs
=
{
'Output'
:
self
.
Out
puts
,
'FusedOutput'
:
self
.
FusedOutput
}
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float32
...
...
@@ -52,14 +54,31 @@ class TestAllocContinuousSpace(OpTest):
return
{
"copy_data"
:
True
,
"set_constant"
:
False
,
"constant"
:
0.0
}
def
init_output
(
self
,
input_list
,
set_constant
,
constant
):
inputs
=
[
input
[
1
].
flatten
()
for
input
in
input_list
]
output
=
np
.
concatenate
(
inputs
)
inputs
=
[]
outputs
=
input_list
for
input
in
input_list
:
length
=
len
(
input
[
1
].
flatten
())
aligned_len
=
(
length
+
alignment
)
/
alignment
*
alignment
out
=
np
.
zeros
(
int
(
aligned_len
))
out
[
0
:
length
]
=
input
[
1
].
flatten
()
inputs
.
append
(
out
)
alloc_continuous_space_var
=
np
.
concatenate
([
input
for
input
in
inputs
])
if
set_constant
:
output
=
np
.
ones
((
len
(
output
)))
*
constant
return
output
alloc_continuous_space_var
=
np
.
ones
(
(
len
(
alloc_continuous_space_var
)))
*
constant
outputs
=
[(
out
[
0
],
np
.
ones
(
out
[
1
].
shape
).
astype
(
self
.
dtype
)
*
constant
)
for
out
in
outputs
]
return
outputs
,
alloc_continuous_space_var
def
test_check_output
(
self
):
self
.
check_output
()
if
core
.
is_compiled_with_cuda
():
self
.
check_output_with_place
(
place
=
core
.
CUDAPlace
(
0
),
no_check_set
=
[
"FusedOutput"
],
atol
=
1e-5
)
class
TestAllocContinuousSpace2
(
TestAllocContinuousSpace
):
...
...
@@ -67,7 +86,11 @@ class TestAllocContinuousSpace2(TestAllocContinuousSpace):
return
{
"copy_data"
:
False
,
"set_constant"
:
True
,
"constant"
:
0.5
}
def
test_check_output
(
self
):
self
.
check_output
(
no_check_set
=
[
"Output"
])
if
core
.
is_compiled_with_cuda
():
self
.
check_output_with_place
(
place
=
core
.
CUDAPlace
(
0
),
no_check_set
=
[
"FusedOutput"
],
atol
=
1e-5
)
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_base_layer.py
浏览文件 @
32146857
...
...
@@ -18,7 +18,7 @@ import numpy as np
import
paddle.fluid
as
fluid
class
L1
(
fluid
.
imperative
.
Layer
):
class
L1
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
prefix
):
super
(
L1
,
self
).
__init__
(
prefix
)
self
.
_param_attr
=
fluid
.
ParamAttr
(
...
...
@@ -32,7 +32,7 @@ class L1(fluid.imperative.Layer):
return
self
.
w1
+
self
.
w2
class
L2
(
fluid
.
imperative
.
Layer
):
class
L2
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
prefix
):
super
(
L2
,
self
).
__init__
(
prefix
)
self
.
layer1
=
L1
(
self
.
full_name
())
...
...
@@ -42,7 +42,7 @@ class L2(fluid.imperative.Layer):
return
self
.
layer1
()
+
self
.
layer2
()
class
L3
(
fluid
.
imperative
.
Layer
):
class
L3
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
prefix
):
super
(
L3
,
self
).
__init__
(
prefix
)
self
.
layer1
=
L2
(
self
.
full_name
())
...
...
@@ -54,7 +54,7 @@ class L3(fluid.imperative.Layer):
class
TestBaseLayer
(
unittest
.
TestCase
):
def
test_one_level
(
self
):
with
fluid
.
imperative
.
guard
():
with
fluid
.
dygraph
.
guard
():
l
=
L1
(
'test_one_level'
)
ret
=
l
()
self
.
assertEqual
(
l
.
w1
.
name
,
"test_one_level/L1_0.w_0"
)
...
...
@@ -62,7 +62,7 @@ class TestBaseLayer(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
ret
.
_numpy
(),
0.2
*
np
.
ones
([
2
,
2
])))
def
test_three_level
(
self
):
with
fluid
.
imperative
.
guard
():
with
fluid
.
dygraph
.
guard
():
l
=
L3
(
'test_three_level'
)
names
=
[
p
.
name
for
p
in
l
.
parameters
()]
ret
=
l
()
...
...
python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py
0 → 100644
浏览文件 @
32146857
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
parallel_executor_test_base
import
TestParallelExecutorBase
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
numpy
as
np
import
paddle
import
paddle.dataset.mnist
as
mnist
import
unittest
import
os
def
simple_fc_net
(
use_feed
):
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
hidden
=
img
for
_
in
range
(
4
):
hidden
=
fluid
.
layers
.
fc
(
hidden
,
size
=
200
,
act
=
'relu'
,
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
1.0
)))
prediction
=
fluid
.
layers
.
fc
(
hidden
,
size
=
10
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
return
loss
def
fc_with_batchnorm
(
use_feed
):
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
hidden
=
img
for
_
in
range
(
2
):
hidden
=
fluid
.
layers
.
fc
(
hidden
,
size
=
200
,
act
=
'relu'
,
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
1.0
)))
hidden
=
fluid
.
layers
.
batch_norm
(
input
=
hidden
)
prediction
=
fluid
.
layers
.
fc
(
hidden
,
size
=
10
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
return
loss
class
TestFuseAdamOps
(
TestParallelExecutorBase
):
@
classmethod
def
setUpClass
(
cls
):
os
.
environ
[
'CPU_NUM'
]
=
str
(
4
)
def
_init_data
(
self
,
random
=
True
):
np
.
random
.
seed
(
5
)
if
random
:
img
=
np
.
random
.
random
(
size
=
[
32
,
784
]).
astype
(
np
.
float32
)
else
:
img
=
np
.
ones
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
np
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
return
img
,
label
def
_compare_fused_optimizer_ops
(
self
,
model
,
use_cuda
,
random_data
=
True
,
optimizer
=
fluid
.
optimizer
.
Adam
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
img
,
label
=
self
.
_init_data
(
random_data
)
not_fuse_op_first_loss
,
not_fuse_op_last_loss
=
self
.
check_network_convergence
(
model
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
use_cuda
=
use_cuda
,
fuse_all_optimizer_ops
=
False
,
memory_opt
=
False
,
# avoid the gradient's name changed in Python side.
optimizer
=
optimizer
)
fuse_op_first_loss
,
fuse_op_last_loss
=
self
.
check_network_convergence
(
model
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
use_cuda
=
use_cuda
,
fuse_all_optimizer_ops
=
True
,
memory_opt
=
False
,
# avoid the gradient's name changed in Python side.
optimizer
=
optimizer
)
for
loss
in
zip
(
not_fuse_op_first_loss
,
fuse_op_first_loss
):
self
.
assertAlmostEquals
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
for
loss
in
zip
(
not_fuse_op_last_loss
,
fuse_op_last_loss
):
self
.
assertAlmostEquals
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
def
test_simple_fc_with_fuse_op
(
self
):
self
.
_compare_fused_optimizer_ops
(
simple_fc_net
,
True
)
self
.
_compare_fused_optimizer_ops
(
simple_fc_net
,
False
)
def
test_batchnorm_fc_with_fuse_op
(
self
):
self
.
_compare_fused_optimizer_ops
(
fc_with_batchnorm
,
True
)
# self._compare_fused_optimizer_ops(fc_with_batchnorm, False)
class
TestFuseSGDOps
(
TestFuseAdamOps
):
def
sgd_optimizer
(
self
,
learning_rate
=
1e-4
):
return
fluid
.
optimizer
.
SGD
(
learning_rate
=
learning_rate
)
def
test_simple_fc_with_fuse_op
(
self
):
self
.
_compare_fused_optimizer_ops
(
simple_fc_net
,
True
,
optimizer
=
self
.
sgd_optimizer
)
self
.
_compare_fused_optimizer_ops
(
simple_fc_net
,
False
,
optimizer
=
self
.
sgd_optimizer
)
def
test_batchnorm_fc_with_fuse_op
(
self
):
self
.
_compare_fused_optimizer_ops
(
fc_with_batchnorm
,
True
,
optimizer
=
self
.
sgd_optimizer
)
self
.
_compare_fused_optimizer_ops
(
fc_with_batchnorm
,
False
,
optimizer
=
self
.
sgd_optimizer
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_gru_op.py
浏览文件 @
32146857
...
...
@@ -156,7 +156,7 @@ class TestGRUOp(OpTest):
}
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
1e-8
,
check_
imperative
=
True
)
self
.
check_output
(
atol
=
1e-8
,
check_
dygraph
=
True
)
def
test_check_grad
(
self
):
self
.
check_grad
([
'Input'
,
'H0'
,
'Weight'
,
'Bias'
],
[
'Hidden'
])
...
...
python/paddle/fluid/tests/unittests/test_imperative_basic.py
浏览文件 @
32146857
...
...
@@ -18,11 +18,11 @@ import numpy as np
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid.
imperative
.nn
import
FC
from
paddle.fluid.
dygraph
.nn
import
FC
from
test_imperative_base
import
new_program_scope
class
MyLayer
(
fluid
.
imperative
.
Layer
):
class
MyLayer
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
):
super
(
MyLayer
,
self
).
__init__
(
name_scope
)
...
...
@@ -34,7 +34,7 @@ class MyLayer(fluid.imperative.Layer):
return
[
x
]
class
MyPyLayer
(
fluid
.
imperative
.
PyLayer
):
class
MyPyLayer
(
fluid
.
dygraph
.
PyLayer
):
def
__init__
(
self
):
super
(
MyPyLayer
,
self
).
__init__
()
...
...
@@ -48,7 +48,7 @@ class MyPyLayer(fluid.imperative.PyLayer):
return
np
.
array
(
dout
)
*
(
1
-
np
.
square
(
np
.
array
(
out
)))
class
MLP
(
fluid
.
imperative
.
Layer
):
class
MLP
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
):
super
(
MLP
,
self
).
__init__
(
name_scope
)
self
.
_fc1
=
FC
(
self
.
full_name
(),
...
...
@@ -71,7 +71,7 @@ class MLP(fluid.imperative.Layer):
return
x
class
SimpleRNNCell
(
fluid
.
imperative
.
Layer
):
class
SimpleRNNCell
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
,
step_input_size
,
hidden_size
,
output_size
,
param_attr
):
super
(
SimpleRNNCell
,
self
).
__init__
(
name_scope
)
...
...
@@ -159,7 +159,7 @@ class SimpleRNNCell(fluid.imperative.Layer):
return
reduce_out
,
hidden
class
SimpleRNN
(
fluid
.
imperative
.
Layer
):
class
SimpleRNN
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
):
super
(
SimpleRNN
,
self
).
__init__
(
name_scope
)
self
.
seq_len
=
4
...
...
@@ -194,10 +194,10 @@ class SimpleRNN(fluid.imperative.Layer):
class
TestImperative
(
unittest
.
TestCase
):
def
test_sum_op
(
self
):
x
=
np
.
ones
([
2
,
2
],
np
.
float32
)
with
fluid
.
imperative
.
guard
():
with
fluid
.
dygraph
.
guard
():
inputs
=
[]
for
_
in
range
(
10
):
inputs
.
append
(
fluid
.
imperative
.
base
.
to_variable
(
x
))
inputs
.
append
(
fluid
.
dygraph
.
base
.
to_variable
(
x
))
ret
=
fluid
.
layers
.
sums
(
inputs
)
loss
=
fluid
.
layers
.
reduce_sum
(
ret
)
loss
.
_backward
()
...
...
@@ -205,17 +205,17 @@ class TestImperative(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
inputs
[
0
].
_gradient
(),
x
))
def
test_layer
(
self
):
with
fluid
.
imperative
.
guard
():
with
fluid
.
dygraph
.
guard
():
cl
=
core
.
Layer
()
cl
.
forward
([])
l
=
fluid
.
imperative
.
Layer
(
"l"
)
l
=
fluid
.
dygraph
.
Layer
(
"l"
)
self
.
assertRaises
(
NotImplementedError
,
l
.
forward
,
[])
def
test_pylayer_func_id
(
self
):
with
fluid
.
imperative
.
guard
():
with
fluid
.
dygraph
.
guard
():
class
PyLayer1
(
fluid
.
imperative
.
PyLayer
):
class
PyLayer1
(
fluid
.
dygraph
.
PyLayer
):
def
__init__
(
self
):
super
(
PyLayer1
,
self
).
__init__
()
...
...
@@ -227,7 +227,7 @@ class TestImperative(unittest.TestCase):
def
backward
(
input
):
return
input
class
PyLayer2
(
fluid
.
imperative
.
PyLayer
):
class
PyLayer2
(
fluid
.
dygraph
.
PyLayer
):
def
__init__
(
self
):
super
(
PyLayer2
,
self
).
__init__
()
...
...
@@ -241,21 +241,21 @@ class TestImperative(unittest.TestCase):
py_layer_1
=
PyLayer1
()
py_layer_2
=
PyLayer2
()
py_layer_1
(
fluid
.
imperative
.
base
.
to_variable
(
np
.
ones
([
2
,
2
])))
py_layer_2
(
fluid
.
imperative
.
base
.
to_variable
(
np
.
ones
([
2
,
2
])))
py_layer_1
(
fluid
.
dygraph
.
base
.
to_variable
(
np
.
ones
([
2
,
2
])))
py_layer_2
(
fluid
.
dygraph
.
base
.
to_variable
(
np
.
ones
([
2
,
2
])))
id
=
py_layer_1
.
forward_id
self
.
assertGreater
(
id
,
0
)
self
.
assertEqual
(
py_layer_1
.
backward_id
,
id
+
1
)
self
.
assertEqual
(
py_layer_2
.
forward_id
,
id
+
2
)
self
.
assertEqual
(
py_layer_2
.
backward_id
,
id
+
3
)
py_layer_1
(
fluid
.
imperative
.
base
.
to_variable
(
np
.
ones
([
2
,
2
])))
py_layer_1
(
fluid
.
dygraph
.
base
.
to_variable
(
np
.
ones
([
2
,
2
])))
self
.
assertEqual
(
py_layer_1
.
forward_id
,
id
)
def
test_pylayer
(
self
):
np_inp
=
np
.
ones
([
2
,
2
],
np
.
float32
)
with
fluid
.
imperative
.
guard
():
with
fluid
.
dygraph
.
guard
():
my_py_layer
=
MyPyLayer
()
var_inp
=
fluid
.
imperative
.
base
.
to_variable
(
np_inp
)
var_inp
=
fluid
.
dygraph
.
base
.
to_variable
(
np_inp
)
outs
=
my_py_layer
(
var_inp
)
dy_out
=
np
.
sum
(
outs
[
0
].
_numpy
())
outs
[
0
].
_backward
()
...
...
@@ -282,8 +282,8 @@ class TestImperative(unittest.TestCase):
def
test_layer_in_out
(
self
):
np_inp
=
np
.
array
([
1.0
,
2.0
,
-
1.0
],
dtype
=
np
.
float32
)
with
fluid
.
imperative
.
guard
():
var_inp
=
fluid
.
imperative
.
base
.
to_variable
(
np_inp
)
with
fluid
.
dygraph
.
guard
():
var_inp
=
fluid
.
dygraph
.
base
.
to_variable
(
np_inp
)
l
=
MyLayer
(
"my_layer"
)
x
=
l
(
var_inp
)[
0
]
self
.
assertIsNotNone
(
x
)
...
...
@@ -310,8 +310,8 @@ class TestImperative(unittest.TestCase):
def
test_mlp
(
self
):
np_inp
=
np
.
array
([[
1.0
,
2.0
],
[
3.0
,
4.0
]],
dtype
=
np
.
float32
)
with
fluid
.
imperative
.
guard
():
var_inp
=
fluid
.
imperative
.
base
.
to_variable
(
np_inp
)
with
fluid
.
dygraph
.
guard
():
var_inp
=
fluid
.
dygraph
.
base
.
to_variable
(
np_inp
)
mlp
=
MLP
(
"mlp"
)
out
=
mlp
(
var_inp
)
dy_out
=
out
.
_numpy
()
...
...
@@ -353,8 +353,8 @@ class TestImperative(unittest.TestCase):
[
10.0
,
11.0
,
12.0
]])
np_inp
=
np_inp
.
reshape
((
1
,
4
,
3
))
np_inp
=
np_inp
.
astype
(
np
.
float32
)
with
fluid
.
imperative
.
guard
():
var_inp
=
fluid
.
imperative
.
base
.
to_variable
(
np_inp
)
with
fluid
.
dygraph
.
guard
():
var_inp
=
fluid
.
dygraph
.
base
.
to_variable
(
np_inp
)
var_inp
=
fluid
.
layers
.
reshape
(
var_inp
,
shape
=
[
1
,
4
,
3
])
simple_rnn
=
SimpleRNN
(
"simple_rnn"
)
outs
,
pre_hiddens
=
simple_rnn
.
forward
(
var_inp
)
...
...
python/paddle/fluid/tests/unittests/test_imperative_checkpoint.py
浏览文件 @
32146857
...
...
@@ -18,11 +18,11 @@ import numpy as np
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.
imperative
.nn
import
Conv2D
,
Pool2D
,
FC
from
paddle.fluid.
imperative
.base
import
to_variable
from
paddle.fluid.
dygraph
.nn
import
Conv2D
,
Pool2D
,
FC
from
paddle.fluid.
dygraph
.base
import
to_variable
class
SimpleImgConvPool
(
fluid
.
imperative
.
Layer
):
class
SimpleImgConvPool
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
,
num_channels
,
...
...
@@ -71,7 +71,7 @@ class SimpleImgConvPool(fluid.imperative.Layer):
return
x
class
MNIST
(
fluid
.
imperative
.
Layer
):
class
MNIST
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
):
super
(
MNIST
,
self
).
__init__
(
name_scope
)
...
...
@@ -98,12 +98,12 @@ class MNIST(fluid.imperative.Layer):
return
x
class
Test
Imperative
Checkpoint
(
unittest
.
TestCase
):
class
Test
Dygraph
Checkpoint
(
unittest
.
TestCase
):
def
save_load_persistables
(
self
):
seed
=
90
epoch_num
=
1
with
fluid
.
imperative
.
guard
():
with
fluid
.
dygraph
.
guard
():
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
...
...
@@ -135,14 +135,14 @@ class TestImperativeCheckpoint(unittest.TestCase):
avg_loss
.
_backward
()
sgd
.
minimize
(
avg_loss
)
fluid
.
imperative
.
save_persistables
(
mnist
,
"save_dir"
)
fluid
.
dygraph
.
save_persistables
(
mnist
,
"save_dir"
)
mnist
.
clear_gradients
()
for
param
in
mnist
.
parameters
():
dy_param_init_value
[
param
.
name
]
=
param
.
_numpy
()
mnist
.
load_dict
(
fluid
.
imperative
.
load_persistables
(
mnist
,
"save_dir"
))
fluid
.
dygraph
.
load_persistables
(
mnist
,
"save_dir"
))
restore
=
mnist
.
parameters
()
...
...
python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
浏览文件 @
32146857
...
...
@@ -22,7 +22,7 @@ import paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
test_imperative_base
import
new_program_scope
from
paddle.fluid.
imperative
.base
import
to_variable
from
paddle.fluid.
dygraph
.base
import
to_variable
# Can use Amusic dataset as the DeepCF describes.
DATA_PATH
=
os
.
environ
.
get
(
'DATA_PATH'
,
''
)
...
...
@@ -32,11 +32,11 @@ NUM_BATCHES = int(os.environ.get('NUM_BATCHES', 5))
NUM_EPOCHES
=
int
(
os
.
environ
.
get
(
'NUM_EPOCHES'
,
1
))
class
DMF
(
fluid
.
imperative
.
Layer
):
class
DMF
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
):
super
(
DMF
,
self
).
__init__
(
name_scope
)
self
.
_user_latent
=
fluid
.
imperative
.
FC
(
self
.
full_name
(),
256
)
self
.
_item_latent
=
fluid
.
imperative
.
FC
(
self
.
full_name
(),
256
)
self
.
_user_latent
=
fluid
.
dygraph
.
FC
(
self
.
full_name
(),
256
)
self
.
_item_latent
=
fluid
.
dygraph
.
FC
(
self
.
full_name
(),
256
)
self
.
_user_layers
=
[]
self
.
_item_layers
=
[]
...
...
@@ -45,12 +45,12 @@ class DMF(fluid.imperative.Layer):
self
.
_user_layers
.
append
(
self
.
add_sublayer
(
'user_layer_%d'
%
i
,
fluid
.
imperative
.
FC
(
fluid
.
dygraph
.
FC
(
self
.
full_name
(),
self
.
_hid_sizes
[
i
],
act
=
'relu'
)))
self
.
_item_layers
.
append
(
self
.
add_sublayer
(
'item_layer_%d'
%
i
,
fluid
.
imperative
.
FC
(
fluid
.
dygraph
.
FC
(
self
.
full_name
(),
self
.
_hid_sizes
[
i
],
act
=
'relu'
)))
def
forward
(
self
,
users
,
items
):
...
...
@@ -63,18 +63,18 @@ class DMF(fluid.imperative.Layer):
return
fluid
.
layers
.
elementwise_mul
(
users
,
items
)
class
MLP
(
fluid
.
imperative
.
Layer
):
class
MLP
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
):
super
(
MLP
,
self
).
__init__
(
name_scope
)
self
.
_user_latent
=
fluid
.
imperative
.
FC
(
self
.
full_name
(),
256
)
self
.
_item_latent
=
fluid
.
imperative
.
FC
(
self
.
full_name
(),
256
)
self
.
_user_latent
=
fluid
.
dygraph
.
FC
(
self
.
full_name
(),
256
)
self
.
_item_latent
=
fluid
.
dygraph
.
FC
(
self
.
full_name
(),
256
)
self
.
_match_layers
=
[]
self
.
_hid_sizes
=
[
128
,
64
]
for
i
in
range
(
len
(
self
.
_hid_sizes
)):
self
.
_match_layers
.
append
(
self
.
add_sublayer
(
'match_layer_%d'
%
i
,
fluid
.
imperative
.
FC
(
fluid
.
dygraph
.
FC
(
self
.
full_name
(),
self
.
_hid_sizes
[
i
],
act
=
'relu'
)))
self
.
_mat
...
...
@@ -88,7 +88,7 @@ class MLP(fluid.imperative.Layer):
return
match_vec
class
DeepCF
(
fluid
.
imperative
.
Layer
):
class
DeepCF
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
,
num_users
,
num_items
,
matrix
):
super
(
DeepCF
,
self
).
__init__
(
name_scope
)
self
.
_num_users
=
num_users
...
...
@@ -103,7 +103,7 @@ class DeepCF(fluid.imperative.Layer):
self
.
_mlp
=
MLP
(
self
.
full_name
())
self
.
_dmf
=
DMF
(
self
.
full_name
())
self
.
_match_fc
=
fluid
.
imperative
.
FC
(
self
.
full_name
(),
1
,
act
=
'sigmoid'
)
self
.
_match_fc
=
fluid
.
dygraph
.
FC
(
self
.
full_name
(),
1
,
act
=
'sigmoid'
)
def
forward
(
self
,
users
,
items
):
# users_emb = self._user_emb(users)
...
...
@@ -191,7 +191,7 @@ def load_data(DATA_PATH):
np
.
expand_dims
(
labels_np
,
-
1
),
num_users
,
num_items
,
matrix
class
Test
Imperative
DeepCF
(
unittest
.
TestCase
):
class
Test
Dygraph
DeepCF
(
unittest
.
TestCase
):
def
test_deefcf
(
self
):
seed
=
90
if
DATA_PATH
:
...
...
@@ -237,7 +237,7 @@ class TestImperativeDeepCF(unittest.TestCase):
fetch_list
=
[
loss
])[
0
]
sys
.
stderr
.
write
(
'static loss %s
\n
'
%
static_loss
)
with
fluid
.
imperative
.
guard
():
with
fluid
.
dygraph
.
guard
():
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
...
...
python/paddle/fluid/tests/unittests/test_imperative_gan.py
浏览文件 @
32146857
...
...
@@ -22,12 +22,12 @@ import paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.
imperative
.nn
import
Conv2D
,
Pool2D
,
FC
from
paddle.fluid.
dygraph
.nn
import
Conv2D
,
Pool2D
,
FC
from
test_imperative_base
import
new_program_scope
from
paddle.fluid.
imperative
.base
import
to_variable
from
paddle.fluid.
dygraph
.base
import
to_variable
class
Discriminator
(
fluid
.
imperative
.
Layer
):
class
Discriminator
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
):
super
(
Discriminator
,
self
).
__init__
(
name_scope
)
self
.
_fc1
=
FC
(
self
.
full_name
(),
size
=
32
,
act
=
'elu'
)
...
...
@@ -38,7 +38,7 @@ class Discriminator(fluid.imperative.Layer):
return
self
.
_fc2
(
x
)
class
Generator
(
fluid
.
imperative
.
Layer
):
class
Generator
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
):
super
(
Generator
,
self
).
__init__
(
name_scope
)
self
.
_fc1
=
FC
(
self
.
full_name
(),
size
=
64
,
act
=
'elu'
)
...
...
@@ -51,7 +51,7 @@ class Generator(fluid.imperative.Layer):
return
self
.
_fc3
(
x
)
class
Test
Imperative
GAN
(
unittest
.
TestCase
):
class
Test
Dygraph
GAN
(
unittest
.
TestCase
):
def
test_gan_float32
(
self
):
seed
=
90
...
...
@@ -130,7 +130,7 @@ class TestImperativeGAN(unittest.TestCase):
scope
.
find_var
(
param
.
name
).
get_tensor
())
dy_params
=
dict
()
with
fluid
.
imperative
.
guard
():
with
fluid
.
dygraph
.
guard
():
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
...
...
python/paddle/fluid/tests/unittests/test_imperative_gnn.py
浏览文件 @
32146857
...
...
@@ -22,16 +22,16 @@ import paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.optimizer
import
AdamOptimizer
from
paddle.fluid.
imperative
.nn
import
Conv2D
,
Pool2D
,
FC
from
paddle.fluid.
dygraph
.nn
import
Conv2D
,
Pool2D
,
FC
from
test_imperative_base
import
new_program_scope
from
paddle.fluid.
imperative
.base
import
to_variable
from
paddle.fluid.
dygraph
.base
import
to_variable
def
gen_data
():
pass
class
GraphConv
(
fluid
.
imperative
.
Layer
):
class
GraphConv
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
,
in_features
,
out_features
):
super
(
GraphConv
,
self
).
__init__
(
name_scope
)
...
...
@@ -50,7 +50,7 @@ class GraphConv(fluid.imperative.Layer):
return
fluid
.
layers
.
matmul
(
adj
,
support
)
+
self
.
bias
class
GCN
(
fluid
.
imperative
.
Layer
):
class
GCN
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
,
num_hidden
):
super
(
GCN
,
self
).
__init__
(
name_scope
)
self
.
gc
=
GraphConv
(
self
.
full_name
(),
num_hidden
,
32
)
...
...
@@ -61,7 +61,7 @@ class GCN(fluid.imperative.Layer):
return
self
.
gc2
(
x
,
adj
)
class
Test
Imperative
GNN
(
unittest
.
TestCase
):
class
Test
Dygraph
GNN
(
unittest
.
TestCase
):
def
test_gnn_float32
(
self
):
seed
=
90
...
...
@@ -115,7 +115,7 @@ class TestImperativeGNN(unittest.TestCase):
static_weight
=
np
.
array
(
scope
.
find_var
(
model
.
gc
.
weight
.
name
).
get_tensor
())
with
fluid
.
imperative
.
guard
():
with
fluid
.
dygraph
.
guard
():
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
...
...
python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
浏览文件 @
32146857
...
...
@@ -23,12 +23,12 @@ import paddle
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.
imperative
.nn
import
Conv2D
,
Pool2D
,
FC
from
paddle.fluid.
imperative
.base
import
to_variable
from
paddle.fluid.
dygraph
.nn
import
Conv2D
,
Pool2D
,
FC
from
paddle.fluid.
dygraph
.base
import
to_variable
from
test_imperative_base
import
new_program_scope
class
SimpleImgConvPool
(
fluid
.
imperative
.
Layer
):
class
SimpleImgConvPool
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
,
num_channels
,
...
...
@@ -77,7 +77,7 @@ class SimpleImgConvPool(fluid.imperative.Layer):
return
x
class
MNIST
(
fluid
.
imperative
.
Layer
):
class
MNIST
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
):
super
(
MNIST
,
self
).
__init__
(
name_scope
)
...
...
@@ -104,11 +104,11 @@ class MNIST(fluid.imperative.Layer):
return
x
class
Test
Imperative
Mnist
(
unittest
.
TestCase
):
class
Test
Dygraph
Mnist
(
unittest
.
TestCase
):
def
test_mnist_float32
(
self
):
seed
=
90
epoch_num
=
1
with
fluid
.
imperative
.
guard
():
with
fluid
.
dygraph
.
guard
():
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
...
...
python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
浏览文件 @
32146857
...
...
@@ -16,17 +16,17 @@ from __future__ import print_function
import
unittest
import
paddle.fluid
as
fluid
from
paddle.fluid.
imperative
.nn
import
Embedding
from
paddle.fluid.
dygraph
.nn
import
Embedding
import
paddle.fluid.framework
as
framework
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.
imperative
.base
import
to_variable
from
paddle.fluid.
dygraph
.base
import
to_variable
from
test_imperative_base
import
new_program_scope
import
numpy
as
np
import
six
from
paddle.fluid.backward
import
append_backward
class
SimpleLSTMRNN
(
fluid
.
imperative
.
Layer
):
class
SimpleLSTMRNN
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
,
hidden_size
,
...
...
@@ -131,7 +131,7 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
return
real_res
,
last_hidden
,
last_cell
class
PtbModel
(
fluid
.
imperative
.
Layer
):
class
PtbModel
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
,
hidden_size
,
...
...
@@ -214,7 +214,7 @@ class PtbModel(fluid.imperative.Layer):
return
loss
,
last_hidden
,
last_cell
class
Test
Imperative
PtbRnn
(
unittest
.
TestCase
):
class
Test
Dygraph
PtbRnn
(
unittest
.
TestCase
):
def
test_ptb_rnn_cpu_float32
(
self
):
seed
=
90
hidden_size
=
10
...
...
@@ -224,7 +224,7 @@ class TestImperativePtbRnn(unittest.TestCase):
init_scale
=
0.1
batch_size
=
4
with
fluid
.
imperative
.
guard
():
with
fluid
.
dygraph
.
guard
():
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
# TODO: marsyang1993 Change seed to
...
...
python/paddle/fluid/tests/unittests/test_imperative_resnet.py
浏览文件 @
32146857
...
...
@@ -21,8 +21,8 @@ import paddle
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle.fluid.
imperative
.nn
import
Conv2D
,
Pool2D
,
BatchNorm
,
FC
from
paddle.fluid.
imperative
.base
import
to_variable
from
paddle.fluid.
dygraph
.nn
import
Conv2D
,
Pool2D
,
BatchNorm
,
FC
from
paddle.fluid.
dygraph
.base
import
to_variable
from
test_imperative_base
import
new_program_scope
batch_size
=
8
...
...
@@ -57,7 +57,7 @@ def optimizer_setting(params):
lr
=
[]
lr
=
[
base_lr
*
(
0.1
**
i
)
for
i
in
range
(
len
(
bd
)
+
1
)]
optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.01
)
# TODO(minqiyang): Add learning rate scheduler support to
imperative
mode
# TODO(minqiyang): Add learning rate scheduler support to
dygraph
mode
# optimizer = fluid.optimizer.Momentum(
# learning_rate=params["lr"],
# learning_rate=fluid.layers.piecewise_decay(
...
...
@@ -68,7 +68,7 @@ def optimizer_setting(params):
return
optimizer
class
ConvBNLayer
(
fluid
.
imperative
.
Layer
):
class
ConvBNLayer
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
,
num_channels
,
...
...
@@ -99,7 +99,7 @@ class ConvBNLayer(fluid.imperative.Layer):
return
y
class
BottleneckBlock
(
fluid
.
imperative
.
Layer
):
class
BottleneckBlock
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
,
num_channels
,
...
...
@@ -156,7 +156,7 @@ class BottleneckBlock(fluid.imperative.Layer):
return
layer_helper
.
append_activation
(
y
)
class
ResNet
(
fluid
.
imperative
.
Layer
):
class
ResNet
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
,
layers
=
50
,
class_dim
=
102
):
super
(
ResNet
,
self
).
__init__
(
name_scope
)
...
...
@@ -226,13 +226,13 @@ class ResNet(fluid.imperative.Layer):
return
y
class
Test
Imperative
Resnet
(
unittest
.
TestCase
):
class
Test
Dygraph
Resnet
(
unittest
.
TestCase
):
def
test_resnet_float32
(
self
):
seed
=
90
batch_size
=
train_parameters
[
"batch_size"
]
batch_num
=
20
with
fluid
.
imperative
.
guard
():
with
fluid
.
dygraph
.
guard
():
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
...
...
python/paddle/fluid/tests/unittests/test_imperative_transformer.py
浏览文件 @
32146857
...
...
@@ -16,7 +16,7 @@ from __future__ import print_function
import
unittest
import
paddle.fluid
as
fluid
from
paddle.fluid.
imperative
import
Embedding
,
LayerNorm
,
FC
,
to_variable
,
Layer
,
guard
from
paddle.fluid.
dygraph
import
Embedding
,
LayerNorm
,
FC
,
to_variable
,
Layer
,
guard
from
test_imperative_base
import
new_program_scope
from
paddle.fluid
import
core
import
numpy
as
np
...
...
@@ -623,7 +623,7 @@ class PrepareEncoderDecoderLayer(Layer):
initializer
=
fluid
.
initializer
.
NumpyArrayInitializer
(
pos_inp
),
trainable
=
False
))
# use in
imperative
_mode to fit different length batch
# use in
dygraph
_mode to fit different length batch
# self._pos_emb._w = to_variable(
# position_encoding_init(self._src_max_len, self._src_emb_dim))
...
...
@@ -946,7 +946,7 @@ class TransFormer(Layer):
return
sum_cost
,
avg_cost
,
predict
,
token_num
class
Test
Imperative
Transformer
(
unittest
.
TestCase
):
class
Test
Dygraph
Transformer
(
unittest
.
TestCase
):
def
test_transformer_float32
(
self
):
seed
=
90
with
guard
():
...
...
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
32146857
...
...
@@ -29,8 +29,8 @@ from paddle.fluid import core
from
paddle.fluid.initializer
import
Constant
import
paddle.fluid.layers
as
layers
from
test_imperative_base
import
new_program_scope
from
paddle.fluid.
imperative
import
nn
from
paddle.fluid.
imperative
import
base
from
paddle.fluid.
dygraph
import
nn
from
paddle.fluid.
dygraph
import
base
class
LayerTest
(
unittest
.
TestCase
):
...
...
@@ -68,7 +68,7 @@ class LayerTest(unittest.TestCase):
@
contextlib
.
contextmanager
def
dynamic_graph
(
self
,
force_to_use_cpu
=
False
):
with
fluid
.
imperative
.
guard
(
with
fluid
.
dygraph
.
guard
(
self
.
_get_place
(
force_to_use_cpu
=
force_to_use_cpu
)):
fluid
.
default_startup_program
().
random_seed
=
self
.
seed
fluid
.
default_main_program
().
random_seed
=
self
.
seed
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py
浏览文件 @
32146857
...
...
@@ -61,6 +61,11 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
param_attr
=
fluid
.
ParamAttr
(
name
=
embedding_name
,
trainable
=
False
))
for
x
in
word_input
]
# TODO(zcd): if the parameter is not trainable, the
# parameter's gradient should not generated.
for
emb_layer
in
emb_layers
:
emb_layer
.
stop_gradient
=
True
emb_layers
.
append
(
predicate_embedding
)
emb_layers
.
append
(
mark_embedding
)
...
...
@@ -113,6 +118,8 @@ class TestCRFModel(unittest.TestCase):
os
.
environ
[
'CPU_NUM'
]
=
str
(
4
)
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
scope
=
fluid
.
Scope
()
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
program_guard
(
main
,
startup
):
word
=
fluid
.
layers
.
data
(
name
=
'word_data'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
浏览文件 @
32146857
...
...
@@ -41,12 +41,13 @@ class TestBase(unittest.TestCase):
fluid
.
CUDAPlace
(
0
)
if
use_gpu
else
fluid
.
CPUPlace
())
exe
.
run
(
startup_prog
)
for
_
in
six
.
moves
.
xrange
(
iter
):
exe_strategy
=
fluid
.
ExecutionStrategy
()
exe_strategy
.
_dry_run
=
True
exe_strategy
.
use_experimental_executor
=
use_experimental_executor
train_cp
=
compiler
.
CompiledProgram
(
main_prog
).
with_data_parallel
(
train_cp
=
compiler
.
CompiledProgram
(
main_prog
).
with_data_parallel
(
loss_name
=
loss
.
name
,
exec_strategy
=
exe_strategy
)
for
_
in
six
.
moves
.
xrange
(
iter
):
for
_
in
six
.
moves
.
xrange
(
iter_per_pe
):
exe
.
run
(
train_cp
)
...
...
python/paddle/fluid/tests/unittests/test_variable.py
浏览文件 @
32146857
...
...
@@ -19,7 +19,6 @@ from paddle.fluid.framework import default_main_program, Program, convert_np_dty
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
numpy
as
np
from
test_imperative_base
import
new_program_scope
class
TestVariable
(
unittest
.
TestCase
):
...
...
@@ -153,7 +152,7 @@ class TestVariableImperative(unittest.TestCase):
self
.
assertEqual
([
1
,
1
,
100
],
nw
.
shape
)
def
test_slice
(
self
):
with
fluid
.
imperative
.
guard
():
with
fluid
.
dygraph
.
guard
():
self
.
_test_slice
()
...
...
python/setup.py.in
浏览文件 @
32146857
...
...
@@ -102,7 +102,7 @@ packages=['paddle',
'paddle.reader',
'paddle.distributed',
'paddle.fluid',
'paddle.fluid.
imperative
',
'paddle.fluid.
dygraph
',
'paddle.fluid.proto',
'paddle.fluid.proto.profiler',
'paddle.fluid.distributed',
...
...
tools/print_signatures.py
浏览文件 @
32146857
...
...
@@ -28,7 +28,7 @@ import hashlib
member_dict
=
collections
.
OrderedDict
()
experimental_namespace
=
{
"paddle.fluid.
imperative
"
}
experimental_namespace
=
{
"paddle.fluid.
dygraph
"
}
def
md5
(
doc
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录