Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
8ed02339
P
Paddle
项目概览
PaddlePaddle
/
Paddle
接近 2 年 前同步成功
通知
2323
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8ed02339
编写于
12月 27, 2018
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into accelerate_ddpg
test=develop
上级
68b86d66
9c6a0203
变更
107
展开全部
隐藏空白更改
内联
并排
Showing
107 changed file
with
2800 addition
and
211 deletion
+2800
-211
paddle/contrib/float16/float16_transpiler.py
paddle/contrib/float16/float16_transpiler.py
+1
-1
paddle/fluid/API.spec
paddle/fluid/API.spec
+18
-5
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+3
-2
paddle/fluid/framework/attribute.h
paddle/fluid/framework/attribute.h
+14
-13
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+5
-3
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+2
-1
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
+44
-17
paddle/fluid/framework/details/scale_loss_grad_op_handle.h
paddle/fluid/framework/details/scale_loss_grad_op_handle.h
+3
-2
paddle/fluid/framework/ngraph_bridge.cc
paddle/fluid/framework/ngraph_bridge.cc
+3
-1
paddle/fluid/framework/op_desc.cc
paddle/fluid/framework/op_desc.cc
+1
-1
paddle/fluid/framework/op_proto_maker.cc
paddle/fluid/framework/op_proto_maker.cc
+4
-0
paddle/fluid/framework/op_proto_maker.h
paddle/fluid/framework/op_proto_maker.h
+1
-0
paddle/fluid/framework/op_registry.cc
paddle/fluid/framework/op_registry.cc
+1
-1
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+56
-19
paddle/fluid/framework/scope_pool.cc
paddle/fluid/framework/scope_pool.cc
+54
-0
paddle/fluid/framework/scope_pool.h
paddle/fluid/framework/scope_pool.h
+46
-0
paddle/fluid/framework/tensor.cc
paddle/fluid/framework/tensor.cc
+1
-2
paddle/fluid/framework/tensor.h
paddle/fluid/framework/tensor.h
+1
-1
paddle/fluid/framework/tensor_util.h
paddle/fluid/framework/tensor_util.h
+22
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+5
-2
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+5
-2
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+9
-0
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
+178
-0
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+117
-0
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+27
-20
paddle/fluid/operators/conv_op.h
paddle/fluid/operators/conv_op.h
+3
-9
paddle/fluid/operators/dequantize_mkldnn_op.cc
paddle/fluid/operators/dequantize_mkldnn_op.cc
+88
-0
paddle/fluid/operators/dequantize_op.cc
paddle/fluid/operators/dequantize_op.cc
+45
-0
paddle/fluid/operators/dequantize_op.h
paddle/fluid/operators/dequantize_op.h
+54
-0
paddle/fluid/operators/detection/density_prior_box_op.cu
paddle/fluid/operators/detection/density_prior_box_op.cu
+3
-2
paddle/fluid/operators/elementwise/elementwise_div_op.cu
paddle/fluid/operators/elementwise/elementwise_div_op.cu
+5
-0
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
+12
-10
paddle/fluid/operators/fill_zeros_like_op.cu.cc
paddle/fluid/operators/fill_zeros_like_op.cu.cc
+3
-0
paddle/fluid/operators/math/concat_and_split.cu
paddle/fluid/operators/math/concat_and_split.cu
+3
-2
paddle/fluid/operators/math/selected_rows_functor.cc
paddle/fluid/operators/math/selected_rows_functor.cc
+12
-5
paddle/fluid/operators/math/selected_rows_functor.cu
paddle/fluid/operators/math/selected_rows_functor.cu
+6
-3
paddle/fluid/operators/math/selected_rows_functor.h
paddle/fluid/operators/math/selected_rows_functor.h
+6
-3
paddle/fluid/operators/metrics/accuracy_op.cu
paddle/fluid/operators/metrics/accuracy_op.cu
+5
-3
paddle/fluid/operators/ngraph/ngraph_ops.h
paddle/fluid/operators/ngraph/ngraph_ops.h
+2
-0
paddle/fluid/operators/ngraph/ops/binary_unnary_op.h
paddle/fluid/operators/ngraph/ops/binary_unnary_op.h
+0
-1
paddle/fluid/operators/ngraph/ops/fill_constant_op.h
paddle/fluid/operators/ngraph/ops/fill_constant_op.h
+61
-0
paddle/fluid/operators/ngraph/ops/top_k_op.h
paddle/fluid/operators/ngraph/ops/top_k_op.h
+51
-0
paddle/fluid/operators/optimizers/adam_op.h
paddle/fluid/operators/optimizers/adam_op.h
+138
-21
paddle/fluid/operators/optimizers/momentum_op.cu
paddle/fluid/operators/optimizers/momentum_op.cu
+4
-1
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+4
-2
paddle/fluid/operators/quantize_mkldnn_op.cc
paddle/fluid/operators/quantize_mkldnn_op.cc
+89
-0
paddle/fluid/operators/quantize_op.cc
paddle/fluid/operators/quantize_op.cc
+47
-0
paddle/fluid/operators/quantize_op.h
paddle/fluid/operators/quantize_op.h
+46
-0
paddle/fluid/operators/sequence_ops/sequence_mask_op.h
paddle/fluid/operators/sequence_ops/sequence_mask_op.h
+1
-1
paddle/fluid/operators/split_lod_tensor_op.cc
paddle/fluid/operators/split_lod_tensor_op.cc
+1
-1
paddle/fluid/operators/top_k_op.cc
paddle/fluid/operators/top_k_op.cc
+14
-1
paddle/fluid/operators/top_k_op.cu
paddle/fluid/operators/top_k_op.cu
+20
-6
paddle/fluid/operators/top_k_op.h
paddle/fluid/operators/top_k_op.h
+10
-2
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+4
-3
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+22
-1
paddle/fluid/platform/mkldnn_reuse.h
paddle/fluid/platform/mkldnn_reuse.h
+16
-0
paddle/fluid/platform/nccl_helper.h
paddle/fluid/platform/nccl_helper.h
+3
-0
paddle/fluid/platform/temporary_allocator.h
paddle/fluid/platform/temporary_allocator.h
+13
-0
paddle/fluid/platform/temporary_allocator_test.cc
paddle/fluid/platform/temporary_allocator_test.cc
+9
-9
paddle/fluid/pybind/CMakeLists.txt
paddle/fluid/pybind/CMakeLists.txt
+1
-1
paddle/fluid/pybind/const_value.cc
paddle/fluid/pybind/const_value.cc
+3
-0
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+15
-2
paddle/scripts/installation_validate.py
paddle/scripts/installation_validate.py
+18
-0
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+6
-0
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-1
python/paddle/fluid/contrib/__init__.py
python/paddle/fluid/contrib/__init__.py
+3
-0
python/paddle/fluid/contrib/slim/__init__.py
python/paddle/fluid/contrib/slim/__init__.py
+25
-0
python/paddle/fluid/contrib/slim/core/__init__.py
python/paddle/fluid/contrib/slim/core/__init__.py
+24
-0
python/paddle/fluid/contrib/slim/core/compress_pass.py
python/paddle/fluid/contrib/slim/core/compress_pass.py
+129
-0
python/paddle/fluid/contrib/slim/core/config.py
python/paddle/fluid/contrib/slim/core/config.py
+111
-0
python/paddle/fluid/contrib/slim/core/pass_builder.py
python/paddle/fluid/contrib/slim/core/pass_builder.py
+39
-0
python/paddle/fluid/contrib/slim/core/strategy.py
python/paddle/fluid/contrib/slim/core/strategy.py
+48
-0
python/paddle/fluid/contrib/slim/demo/filter_prune/config.yaml
...n/paddle/fluid/contrib/slim/demo/filter_prune/config.yaml
+28
-0
python/paddle/fluid/contrib/slim/demo/filter_prune/demo.py
python/paddle/fluid/contrib/slim/demo/filter_prune/demo.py
+69
-0
python/paddle/fluid/contrib/slim/graph/__init__.py
python/paddle/fluid/contrib/slim/graph/__init__.py
+23
-0
python/paddle/fluid/contrib/slim/graph/executor.py
python/paddle/fluid/contrib/slim/graph/executor.py
+62
-0
python/paddle/fluid/contrib/slim/graph/graph.py
python/paddle/fluid/contrib/slim/graph/graph.py
+45
-0
python/paddle/fluid/contrib/slim/graph/graph_pass.py
python/paddle/fluid/contrib/slim/graph/graph_pass.py
+42
-0
python/paddle/fluid/contrib/slim/prune/__init__.py
python/paddle/fluid/contrib/slim/prune/__init__.py
+21
-0
python/paddle/fluid/contrib/slim/prune/prune_strategy.py
python/paddle/fluid/contrib/slim/prune/prune_strategy.py
+66
-0
python/paddle/fluid/contrib/slim/prune/pruner.py
python/paddle/fluid/contrib/slim/prune/pruner.py
+83
-0
python/paddle/fluid/contrib/slim/unitest/__init__.py
python/paddle/fluid/contrib/slim/unitest/__init__.py
+13
-0
python/paddle/fluid/contrib/slim/unitest/configs/config.yaml
python/paddle/fluid/contrib/slim/unitest/configs/config.yaml
+29
-0
python/paddle/fluid/contrib/slim/unitest/configs/pruners.yaml
...on/paddle/fluid/contrib/slim/unitest/configs/pruners.yaml
+12
-0
python/paddle/fluid/contrib/slim/unitest/configs/pruners_0.yaml
.../paddle/fluid/contrib/slim/unitest/configs/pruners_0.yaml
+12
-0
python/paddle/fluid/contrib/slim/unitest/test_factory.py
python/paddle/fluid/contrib/slim/unitest/test_factory.py
+41
-0
python/paddle/fluid/data_feeder.py
python/paddle/fluid/data_feeder.py
+2
-0
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+1
-1
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+5
-0
python/paddle/fluid/initializer.py
python/paddle/fluid/initializer.py
+50
-4
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+16
-6
python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py
...id/tests/unittests/ngraph/test_fill_constant_ngraph_op.py
+37
-0
python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py
...ddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py
+41
-0
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+2
-0
python/paddle/fluid/tests/unittests/test_accuracy_op.py
python/paddle/fluid/tests/unittests/test_accuracy_op.py
+15
-2
python/paddle/fluid/tests/unittests/test_dequantize_mkldnn_op.py
...paddle/fluid/tests/unittests/test_dequantize_mkldnn_op.py
+73
-0
python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
...n/paddle/fluid/tests/unittests/test_elementwise_div_op.py
+23
-2
python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
...n/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
+5
-0
python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
...n/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
+11
-1
python/paddle/fluid/tests/unittests/test_momentum_op.py
python/paddle/fluid/tests/unittests/test_momentum_op.py
+17
-4
python/paddle/fluid/tests/unittests/test_operator_desc.py
python/paddle/fluid/tests/unittests/test_operator_desc.py
+1
-1
python/paddle/fluid/tests/unittests/test_py_func_op.py
python/paddle/fluid/tests/unittests/test_py_func_op.py
+3
-3
python/paddle/fluid/tests/unittests/test_quantize_mkldnn_op.py
...n/paddle/fluid/tests/unittests/test_quantize_mkldnn_op.py
+76
-0
python/paddle/fluid/tests/unittests/test_top_k_op.py
python/paddle/fluid/tests/unittests/test_top_k_op.py
+25
-3
python/paddle/fluid/transpiler/inference_transpiler.py
python/paddle/fluid/transpiler/inference_transpiler.py
+1
-1
python/requirements.txt
python/requirements.txt
+2
-0
python/setup.py.in
python/setup.py.in
+4
-0
未找到文件。
paddle/contrib/float16/float16_transpiler.py
浏览文件 @
8ed02339
...
@@ -60,7 +60,7 @@ class Float16Transpiler:
...
@@ -60,7 +60,7 @@ class Float16Transpiler:
raise
TypeError
(
"place should be as CPUPlace/CUDAPlace type"
)
raise
TypeError
(
"place should be as CPUPlace/CUDAPlace type"
)
if
scope
is
None
:
if
scope
is
None
:
scope
=
global_scope
()
scope
=
global_scope
()
if
not
isinstance
(
scope
,
core
.
Scope
):
if
not
isinstance
(
scope
,
core
.
_
Scope
):
raise
TypeError
(
"scope should be as Scope type or None"
)
raise
TypeError
(
"scope should be as Scope type or None"
)
self
.
scope
=
scope
self
.
scope
=
scope
...
...
paddle/fluid/API.spec
浏览文件 @
8ed02339
...
@@ -351,6 +351,23 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_b
...
@@ -351,6 +351,23 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_b
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.contrib.build_compressor ArgSpec(args=['place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'config'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None))
paddle.fluid.contrib.CompressPass.__init__ ArgSpec(args=['self', 'place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'program_exe'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None))
paddle.fluid.contrib.CompressPass.add_strategy ArgSpec(args=['self', 'strategy'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.CompressPass.apply ArgSpec(args=['self', 'graph'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.ImitationGraph.__init__ ArgSpec(args=['self', 'program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.ImitationGraph.all_parameters ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.SensitivePruneStrategy.__init__ ArgSpec(args=['self', 'pruner', 'start_epoch', 'end_epoch', 'delta_rate', 'acc_loss_threshold', 'sensitivities'], varargs=None, keywords=None, defaults=(None, 0, 10, 0.2, 0.2, None))
paddle.fluid.contrib.SensitivePruneStrategy.on_batch_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.SensitivePruneStrategy.on_batch_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.SensitivePruneStrategy.on_compress_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.SensitivePruneStrategy.on_compress_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.MagnitudePruner.__init__ ArgSpec(args=['self', 'threshold'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.MagnitudePruner.prune ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.RatioPruner.__init__ ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.RatioPruner.prune ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.load_persistables_for_increment ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.load_persistables_for_increment ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.load_persistables_for_inference ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.load_persistables_for_inference ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.convert_dist_to_sparse_program ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.convert_dist_to_sparse_program ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
...
@@ -447,11 +464,7 @@ paddle.fluid.unique_name.switch ArgSpec(args=['new_generator'], varargs=None, ke
...
@@ -447,11 +464,7 @@ paddle.fluid.unique_name.switch ArgSpec(args=['new_generator'], varargs=None, ke
paddle.fluid.unique_name.guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.unique_name.guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.recordio_writer.convert_reader_to_recordio_file ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.recordio_writer.convert_reader_to_recordio_file ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.recordio_writer.convert_reader_to_recordio_files ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.recordio_writer.convert_reader_to_recordio_files ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.Scope.__init__ __init__(self: paddle.fluid.core.Scope) -> None
paddle.fluid.Scope Scope() -> paddle.fluid.core._Scope
paddle.fluid.Scope.drop_kids drop_kids(self: paddle.fluid.core.Scope) -> None
paddle.fluid.Scope.find_var find_var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable
paddle.fluid.Scope.new_scope new_scope(self: paddle.fluid.core.Scope) -> paddle.fluid.core.Scope
paddle.fluid.Scope.var var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable
paddle.reader.map_readers ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None)
paddle.reader.map_readers ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None)
paddle.reader.buffered ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None)
paddle.reader.buffered ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None)
paddle.reader.compose ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None)
paddle.reader.compose ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None)
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
8ed02339
...
@@ -48,10 +48,10 @@ if(WITH_GPU)
...
@@ -48,10 +48,10 @@ if(WITH_GPU)
nv_library
(
tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context
)
nv_library
(
tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context
)
add_dependencies
(
tensor tensor_util
)
add_dependencies
(
tensor tensor_util
)
else
()
else
()
nv_library
(
tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context
)
nv_library
(
tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context
)
endif
(
WIN32
)
endif
(
WIN32
)
else
()
else
()
cc_library
(
tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context
)
cc_library
(
tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context
)
endif
()
endif
()
cc_test
(
tensor_test SRCS tensor_test.cc DEPS tensor
)
cc_test
(
tensor_test SRCS tensor_test.cc DEPS tensor
)
...
@@ -84,6 +84,7 @@ cc_library(threadpool SRCS threadpool.cc DEPS enforce)
...
@@ -84,6 +84,7 @@ cc_library(threadpool SRCS threadpool.cc DEPS enforce)
cc_test
(
threadpool_test SRCS threadpool_test.cc DEPS threadpool
)
cc_test
(
threadpool_test SRCS threadpool_test.cc DEPS threadpool
)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool xxhash
)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool xxhash
)
cc_library
(
scope_pool SRCS scope_pool.cc DEPS scope
)
cc_test
(
scope_test SRCS scope_test.cc DEPS scope
)
cc_test
(
scope_test SRCS scope_test.cc DEPS scope
)
cc_library
(
data_device_transform SRCS data_device_transform.cc DEPS tensor
)
cc_library
(
data_device_transform SRCS data_device_transform.cc DEPS tensor
)
...
...
paddle/fluid/framework/attribute.h
浏览文件 @
8ed02339
...
@@ -165,7 +165,7 @@ template <typename T>
...
@@ -165,7 +165,7 @@ template <typename T>
class
GreaterThanChecker
{
class
GreaterThanChecker
{
public:
public:
explicit
GreaterThanChecker
(
T
lower_bound
)
:
lower_bound_
(
lower_bound
)
{}
explicit
GreaterThanChecker
(
T
lower_bound
)
:
lower_bound_
(
lower_bound
)
{}
void
operator
()(
T
&
value
)
const
{
void
operator
()(
const
T
&
value
)
const
{
PADDLE_ENFORCE
(
value
>
lower_bound_
,
"larger_than check fails."
);
PADDLE_ENFORCE
(
value
>
lower_bound_
,
"larger_than check fails."
);
}
}
...
@@ -177,7 +177,7 @@ template <typename T>
...
@@ -177,7 +177,7 @@ template <typename T>
class
EqualGreaterThanChecker
{
class
EqualGreaterThanChecker
{
public:
public:
explicit
EqualGreaterThanChecker
(
T
lower_bound
)
:
lower_bound_
(
lower_bound
)
{}
explicit
EqualGreaterThanChecker
(
T
lower_bound
)
:
lower_bound_
(
lower_bound
)
{}
void
operator
()(
T
&
value
)
const
{
void
operator
()(
const
T
&
value
)
const
{
PADDLE_ENFORCE_GE
(
value
,
lower_bound_
,
"equal_larger_than check fails."
);
PADDLE_ENFORCE_GE
(
value
,
lower_bound_
,
"equal_larger_than check fails."
);
}
}
...
@@ -193,7 +193,7 @@ class DefaultValueSetter {
...
@@ -193,7 +193,7 @@ class DefaultValueSetter {
public:
public:
explicit
DefaultValueSetter
(
T
default_value
)
explicit
DefaultValueSetter
(
T
default_value
)
:
default_value_
(
default_value
)
{}
:
default_value_
(
default_value
)
{}
void
operator
()(
T
&
value
)
const
{
value
=
default_value_
;
}
// NOLINT
void
operator
()(
T
*
value
)
const
{
*
value
=
default_value_
;
}
private:
private:
T
default_value_
;
T
default_value_
;
...
@@ -203,7 +203,7 @@ template <typename T>
...
@@ -203,7 +203,7 @@ template <typename T>
class
EnumInContainer
{
class
EnumInContainer
{
public:
public:
explicit
EnumInContainer
(
const
std
::
unordered_set
<
T
>&
c
)
:
container_
(
c
)
{}
explicit
EnumInContainer
(
const
std
::
unordered_set
<
T
>&
c
)
:
container_
(
c
)
{}
void
operator
()(
T
&
val
)
const
{
void
operator
()(
const
T
&
val
)
const
{
PADDLE_ENFORCE
(
container_
.
find
(
val
)
!=
container_
.
end
(),
PADDLE_ENFORCE
(
container_
.
find
(
val
)
!=
container_
.
end
(),
"Value %s is not in enum container %s"
,
val
,
"Value %s is not in enum container %s"
,
val
,
ContainerDebugString
());
ContainerDebugString
());
...
@@ -232,7 +232,8 @@ class EnumInContainer {
...
@@ -232,7 +232,8 @@ class EnumInContainer {
// an attribute can have more than one limits
// an attribute can have more than one limits
template
<
typename
T
>
template
<
typename
T
>
class
TypedAttrChecker
{
class
TypedAttrChecker
{
typedef
std
::
function
<
void
(
T
&
)
>
ValueChecker
;
typedef
std
::
function
<
void
(
T
*
)
>
DefaultValueChecker
;
typedef
std
::
function
<
void
(
const
T
&
)
>
ValueChecker
;
public:
public:
explicit
TypedAttrChecker
(
const
std
::
string
&
attr_name
)
explicit
TypedAttrChecker
(
const
std
::
string
&
attr_name
)
...
@@ -268,17 +269,17 @@ class TypedAttrChecker {
...
@@ -268,17 +269,17 @@ class TypedAttrChecker {
return
*
this
;
return
*
this
;
}
}
void
operator
()(
AttributeMap
&
attr_map
)
const
{
// NOLINT
void
operator
()(
AttributeMap
*
attr_map
)
const
{
if
(
!
attr_map
.
count
(
attr_name_
))
{
if
(
!
attr_map
->
count
(
attr_name_
))
{
// user do not set this attr
// user do not set this attr
PADDLE_ENFORCE
(
!
default_value_setter_
.
empty
(),
PADDLE_ENFORCE
(
!
default_value_setter_
.
empty
(),
"Attribute '%s' is required!"
,
attr_name_
);
"Attribute '%s' is required!"
,
attr_name_
);
// default_value_setter_ has no more than one element
// default_value_setter_ has no more than one element
T
val
;
T
val
;
(
default_value_setter_
[
0
])(
val
);
(
default_value_setter_
[
0
])(
&
val
);
attr_map
[
attr_name_
]
=
val
;
(
*
attr_map
)
[
attr_name_
]
=
val
;
}
}
Attribute
&
attr
=
attr_map
.
at
(
attr_name_
);
Attribute
&
attr
=
attr_map
->
at
(
attr_name_
);
ExtractAttribute
<
T
>
extract_attr
(
attr_name_
);
ExtractAttribute
<
T
>
extract_attr
(
attr_name_
);
T
*
attr_value
=
extract_attr
(
attr
);
T
*
attr_value
=
extract_attr
(
attr
);
for
(
const
auto
&
checker
:
value_checkers_
)
{
for
(
const
auto
&
checker
:
value_checkers_
)
{
...
@@ -289,12 +290,12 @@ class TypedAttrChecker {
...
@@ -289,12 +290,12 @@ class TypedAttrChecker {
private:
private:
std
::
string
attr_name_
;
std
::
string
attr_name_
;
std
::
vector
<
ValueChecker
>
value_checkers_
;
std
::
vector
<
ValueChecker
>
value_checkers_
;
std
::
vector
<
ValueChecker
>
default_value_setter_
;
std
::
vector
<
Default
ValueChecker
>
default_value_setter_
;
};
};
// check whether op's all attributes fit their own limits
// check whether op's all attributes fit their own limits
class
OpAttrChecker
{
class
OpAttrChecker
{
typedef
std
::
function
<
void
(
AttributeMap
&
)
>
AttrChecker
;
typedef
std
::
function
<
void
(
AttributeMap
*
)
>
AttrChecker
;
public:
public:
template
<
typename
T
>
template
<
typename
T
>
...
@@ -304,7 +305,7 @@ class OpAttrChecker {
...
@@ -304,7 +305,7 @@ class OpAttrChecker {
return
*
(
checker
.
target
<
TypedAttrChecker
<
T
>>
());
return
*
(
checker
.
target
<
TypedAttrChecker
<
T
>>
());
}
}
void
Check
(
AttributeMap
&
attr_map
)
const
{
// NOLINT
void
Check
(
AttributeMap
*
attr_map
)
const
{
for
(
const
auto
&
checker
:
attr_checkers_
)
{
for
(
const
auto
&
checker
:
attr_checkers_
)
{
checker
(
attr_map
);
checker
(
attr_map
);
}
}
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
8ed02339
...
@@ -355,7 +355,9 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
...
@@ -355,7 +355,9 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
BuildStrategy
::
GradientScaleStrategy
::
kCustomized
)
{
BuildStrategy
::
GradientScaleStrategy
::
kCustomized
)
{
// TODO(paddle-dev): Why is there no input for this op_handle?
// TODO(paddle-dev): Why is there no input for this op_handle?
auto
loss_grad_name
=
node
->
Op
()
->
OutputArgumentNames
()[
0
];
auto
loss_grad_name
=
node
->
Op
()
->
OutputArgumentNames
()[
0
];
CreateScaleLossGradOp
(
&
result
,
loss_grad_name
,
node
->
outputs
[
0
]);
auto
out_dtype
=
all_vars_
.
at
(
loss_grad_name
)
->
GetDataType
();
CreateScaleLossGradOp
(
&
result
,
loss_grad_name
,
node
->
outputs
[
0
],
out_dtype
);
}
}
// This assumes the backward generating code will ensure IsScaleLossOp
// This assumes the backward generating code will ensure IsScaleLossOp
// is true only for the op that scale the final scalar loss.
// is true only for the op that scale the final scalar loss.
...
@@ -658,13 +660,13 @@ int MultiDevSSAGraphBuilder::GetVarDeviceID(
...
@@ -658,13 +660,13 @@ int MultiDevSSAGraphBuilder::GetVarDeviceID(
void
MultiDevSSAGraphBuilder
::
CreateScaleLossGradOp
(
void
MultiDevSSAGraphBuilder
::
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
,
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
,
ir
::
Node
*
out_var_node
)
const
{
ir
::
Node
*
out_var_node
,
proto
::
VarType
::
Type
dtype
)
const
{
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
// Insert ScaleCost OpHandle
// Insert ScaleCost OpHandle
auto
*
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
places_
[
i
]);
auto
*
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
places_
[
i
]);
auto
*
op_handle
=
new
ScaleLossGradOpHandle
(
auto
*
op_handle
=
new
ScaleLossGradOpHandle
(
result
->
CreateEmptyNode
(
"scale_loss_grad"
,
ir
::
Node
::
Type
::
kOperation
),
result
->
CreateEmptyNode
(
"scale_loss_grad"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
.
size
(),
local_scopes_
[
i
],
places_
[
i
],
dev_ctx
);
local_scopes_
.
size
(),
local_scopes_
[
i
],
places_
[
i
],
dev_ctx
,
dtype
);
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
op_handle
);
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
op_handle
);
// FIXME: Currently ScaleLossGradOp only use device_count as scale
// FIXME: Currently ScaleLossGradOp only use device_count as scale
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
8ed02339
...
@@ -68,7 +68,8 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
...
@@ -68,7 +68,8 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
void
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
void
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
,
const
std
::
string
&
loss_grad_name
,
ir
::
Node
*
out_var_node
)
const
;
ir
::
Node
*
out_var_node
,
proto
::
VarType
::
Type
dtype
)
const
;
VarHandle
*
CreateReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
,
VarHandle
*
CreateReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
,
int
dst_dev_id
)
const
;
int
dst_dev_id
)
const
;
...
...
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
浏览文件 @
8ed02339
...
@@ -22,39 +22,66 @@ namespace details {
...
@@ -22,39 +22,66 @@ namespace details {
ScaleLossGradOpHandle
::
ScaleLossGradOpHandle
(
ir
::
Node
*
node
,
size_t
num_dev
,
ScaleLossGradOpHandle
::
ScaleLossGradOpHandle
(
ir
::
Node
*
node
,
size_t
num_dev
,
Scope
*
scope
,
Scope
*
scope
,
platform
::
Place
place
,
platform
::
Place
place
,
platform
::
DeviceContext
*
dev_ctx
)
platform
::
DeviceContext
*
dev_ctx
,
proto
::
VarType
::
Type
dtype
)
:
OpHandleBase
(
node
),
:
OpHandleBase
(
node
),
coeff_
(
static_cast
<
float
>
(
1.0
/
num_dev
)),
coeff_
(
static_cast
<
float
>
(
1.0
/
num_dev
)),
scope_
(
scope
),
scope_
(
scope
),
place_
(
place
)
{
place_
(
place
),
out_dtype_
(
dtype
)
{
this
->
SetDeviceContext
(
place_
,
dev_ctx
);
this
->
SetDeviceContext
(
place_
,
dev_ctx
);
}
}
ScaleLossGradOpHandle
::~
ScaleLossGradOpHandle
()
{}
ScaleLossGradOpHandle
::~
ScaleLossGradOpHandle
()
{}
struct
ScaleLossGradFunctor
{
float
coeff_
;
Tensor
*
out_
;
platform
::
Place
place_
;
OpHandleBase
*
op_handle_
;
proto
::
VarType
::
Type
out_dtype_
;
platform
::
DeviceContext
*
ctx_
;
ScaleLossGradFunctor
(
float
coeff
,
Tensor
*
out
,
platform
::
Place
place
,
OpHandleBase
*
op_handle
,
proto
::
VarType
::
Type
dtype
,
platform
::
DeviceContext
*
ctx
)
:
coeff_
(
coeff
),
out_
(
out
),
place_
(
place
),
out_dtype_
(
dtype
),
ctx_
(
ctx
)
{}
template
<
typename
OutT
>
void
apply
()
const
{
auto
*
out_data
=
out_
->
mutable_data
<
OutT
>
(
place_
);
if
(
platform
::
is_cpu_place
(
place_
))
{
*
out_data
=
static_cast
<
OutT
>
(
coeff_
);
}
else
{
#ifdef PADDLE_WITH_CUDA
OutT
cast_coeff
=
static_cast
<
OutT
>
(
coeff_
);
auto
stream
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
ctx_
)
->
stream
();
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
),
out_data
,
platform
::
CPUPlace
(),
&
cast_coeff
,
SizeOfType
(
out_dtype_
),
stream
);
VLOG
(
10
)
<<
place_
<<
"RUN Scale loss grad op"
;
#endif
}
}
};
void
ScaleLossGradOpHandle
::
RunImpl
()
{
void
ScaleLossGradOpHandle
::
RunImpl
()
{
// Doesn't wait any event
// Doesn't wait any event
std
::
string
var_name
=
static_cast
<
VarHandle
*>
(
this
->
outputs_
[
0
])
->
name_
;
std
::
string
var_name
=
static_cast
<
VarHandle
*>
(
this
->
outputs_
[
0
])
->
name_
;
auto
&
local_scope
=
*
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
&
local_scope
=
*
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
float
*
tmp
=
local_scope
.
FindVar
(
var_name
)
auto
*
tensor
=
local_scope
.
FindVar
(
var_name
)
->
GetMutable
<
LoDTensor
>
();
->
GetMutable
<
LoDTensor
>
()
tensor
->
Resize
(
make_ddim
({
1
}));
->
mutable_data
<
float
>
(
make_ddim
({
1
}),
place_
);
if
(
platform
::
is_cpu_place
(
place_
))
{
*
tmp
=
coeff_
;
}
else
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
this
->
RunAndRecordEvent
([
&
]
{
ScaleLossGradFunctor
func
(
coeff_
,
tensor
,
place_
,
this
,
out_dtype_
,
auto
stream
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
this
->
dev_ctxes_
.
at
(
place_
));
this
->
dev_ctxes_
.
at
(
place_
))
this
->
RunAndRecordEvent
([
&
]
{
framework
::
VisitDataType
(
out_dtype_
,
func
);
});
->
stream
();
#else
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
),
tmp
,
ScaleLossGradFunctor
func
(
coeff_
,
tensor
,
place_
,
this
,
out_dtype_
,
nullptr
);
platform
::
CPUPlace
(),
&
coeff_
,
sizeof
(
float
),
stream
);
framework
::
VisitDataType
(
out_dtype_
,
func
);
VLOG
(
10
)
<<
place_
<<
"RUN Scale loss grad op"
;
});
#endif
#endif
}
}
}
std
::
string
ScaleLossGradOpHandle
::
Name
()
const
{
return
"Scale LossGrad"
;
}
std
::
string
ScaleLossGradOpHandle
::
Name
()
const
{
return
"Scale LossGrad"
;
}
...
...
paddle/fluid/framework/details/scale_loss_grad_op_handle.h
浏览文件 @
8ed02339
...
@@ -26,8 +26,8 @@ namespace details {
...
@@ -26,8 +26,8 @@ namespace details {
struct
ScaleLossGradOpHandle
:
public
OpHandleBase
{
struct
ScaleLossGradOpHandle
:
public
OpHandleBase
{
ScaleLossGradOpHandle
(
ir
::
Node
*
node
,
size_t
num_dev
,
Scope
*
scope
,
ScaleLossGradOpHandle
(
ir
::
Node
*
node
,
size_t
num_dev
,
Scope
*
scope
,
platform
::
Place
place
,
platform
::
Place
place
,
platform
::
DeviceContext
*
context
,
p
latform
::
DeviceContext
*
context
);
p
roto
::
VarType
::
Type
dtype
);
~
ScaleLossGradOpHandle
()
final
;
~
ScaleLossGradOpHandle
()
final
;
...
@@ -40,6 +40,7 @@ struct ScaleLossGradOpHandle : public OpHandleBase {
...
@@ -40,6 +40,7 @@ struct ScaleLossGradOpHandle : public OpHandleBase {
float
coeff_
;
float
coeff_
;
Scope
*
scope_
;
Scope
*
scope_
;
platform
::
Place
place_
;
platform
::
Place
place_
;
proto
::
VarType
::
Type
out_dtype_
;
};
};
}
// namespace details
}
// namespace details
...
...
paddle/fluid/framework/ngraph_bridge.cc
浏览文件 @
8ed02339
...
@@ -31,10 +31,12 @@ std::map<std::string,
...
@@ -31,10 +31,12 @@ std::map<std::string,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
)
>>
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
)
>>
NgraphBridge
::
NG_NODE_MAP
=
{
NgraphBridge
::
NG_NODE_MAP
=
{
{
"fill_constant"
,
paddle
::
operators
::
ngraphs
::
BuildFillConstantNode
},
{
"mul"
,
paddle
::
operators
::
ngraphs
::
BuildMulNode
},
{
"mul"
,
paddle
::
operators
::
ngraphs
::
BuildMulNode
},
{
"mul_grad"
,
paddle
::
operators
::
ngraphs
::
BuildMulGradNode
},
{
"mul_grad"
,
paddle
::
operators
::
ngraphs
::
BuildMulGradNode
},
{
"relu"
,
paddle
::
operators
::
ngraphs
::
BuildUnaryNode
<
ngraph
::
op
::
Relu
>
},
{
"relu"
,
paddle
::
operators
::
ngraphs
::
BuildUnaryNode
<
ngraph
::
op
::
Relu
>
},
{
"tanh"
,
paddle
::
operators
::
ngraphs
::
BuildUnaryNode
<
ngraph
::
op
::
Tanh
>
}};
{
"tanh"
,
paddle
::
operators
::
ngraphs
::
BuildUnaryNode
<
ngraph
::
op
::
Tanh
>
},
{
"top_k"
,
paddle
::
operators
::
ngraphs
::
BuildTopKNode
}};
void
NgraphBridge
::
BuildNgNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
)
{
void
NgraphBridge
::
BuildNgNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
)
{
auto
&
op_type
=
op
->
Type
();
auto
&
op_type
=
op
->
Type
();
...
...
paddle/fluid/framework/op_desc.cc
浏览文件 @
8ed02339
...
@@ -643,7 +643,7 @@ void OpDesc::CheckAttrs() {
...
@@ -643,7 +643,7 @@ void OpDesc::CheckAttrs() {
// not by users.
// not by users.
return
;
return
;
}
}
checker
->
Check
(
attrs_
);
checker
->
Check
(
&
attrs_
);
}
}
void
OpDesc
::
InferShape
(
const
BlockDesc
&
block
)
const
{
void
OpDesc
::
InferShape
(
const
BlockDesc
&
block
)
const
{
...
...
paddle/fluid/framework/op_proto_maker.cc
浏览文件 @
8ed02339
...
@@ -82,6 +82,10 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
...
@@ -82,6 +82,10 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
AddAttr
<
std
::
string
>
(
OpNamescopeAttrName
(),
"Operator name with namesope."
)
AddAttr
<
std
::
string
>
(
OpNamescopeAttrName
(),
"Operator name with namesope."
)
.
SetDefault
(
""
);
.
SetDefault
(
""
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
OpCreationCallstackAttrName
(),
"Callstack for Op Creatation."
)
.
SetDefault
({});
Validate
();
Validate
();
}
}
...
...
paddle/fluid/framework/op_proto_maker.h
浏览文件 @
8ed02339
...
@@ -47,6 +47,7 @@ class OpProtoAndCheckerMaker {
...
@@ -47,6 +47,7 @@ class OpProtoAndCheckerMaker {
static
const
char
*
OpRoleAttrName
()
{
return
"op_role"
;
}
static
const
char
*
OpRoleAttrName
()
{
return
"op_role"
;
}
static
const
char
*
OpRoleVarAttrName
()
{
return
"op_role_var"
;
}
static
const
char
*
OpRoleVarAttrName
()
{
return
"op_role_var"
;
}
static
const
char
*
OpNamescopeAttrName
()
{
return
"op_namescope"
;
}
static
const
char
*
OpNamescopeAttrName
()
{
return
"op_namescope"
;
}
static
const
char
*
OpCreationCallstackAttrName
()
{
return
"op_callstack"
;
}
void
operator
()(
proto
::
OpProto
*
proto
,
OpAttrChecker
*
attr_checker
);
void
operator
()(
proto
::
OpProto
*
proto
,
OpAttrChecker
*
attr_checker
);
...
...
paddle/fluid/framework/op_registry.cc
浏览文件 @
8ed02339
...
@@ -24,7 +24,7 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
...
@@ -24,7 +24,7 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
const
VariableNameMap
&
outputs
,
AttributeMap
attrs
)
{
const
VariableNameMap
&
outputs
,
AttributeMap
attrs
)
{
auto
&
info
=
OpInfoMap
::
Instance
().
Get
(
type
);
auto
&
info
=
OpInfoMap
::
Instance
().
Get
(
type
);
if
(
info
.
Checker
()
!=
nullptr
)
{
if
(
info
.
Checker
()
!=
nullptr
)
{
info
.
Checker
()
->
Check
(
attrs
);
info
.
Checker
()
->
Check
(
&
attrs
);
}
}
auto
op
=
info
.
Creator
()(
type
,
inputs
,
outputs
,
attrs
);
auto
op
=
info
.
Creator
()(
type
,
inputs
,
outputs
,
attrs
);
return
std
::
unique_ptr
<
OperatorBase
>
(
op
);
return
std
::
unique_ptr
<
OperatorBase
>
(
op
);
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
8ed02339
...
@@ -16,10 +16,15 @@ limitations under the License. */
...
@@ -16,10 +16,15 @@ limitations under the License. */
#include <glog/logging.h>
#include <glog/logging.h>
#include <algorithm>
#include <algorithm>
#include <sstream>
#include <string>
#include <vector>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
...
@@ -157,27 +162,59 @@ RuntimeContext::RuntimeContext(const VariableNameMap& innames,
...
@@ -157,27 +162,59 @@ RuntimeContext::RuntimeContext(const VariableNameMap& innames,
}
}
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
try
{
if
(
platform
::
is_gpu_place
(
place
))
{
if
(
VLOG_IS_ON
(
4
))
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
if
(
platform
::
is_gpu_place
(
place
))
{
#ifndef PADDLE_WITH_CUDA
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
#else
#else
auto
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
).
device
;
auto
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
).
device
;
platform
::
SetDeviceId
(
dev_id
);
platform
::
SetDeviceId
(
dev_id
);
#endif
#endif
}
}
// The profile has a process-wide mutex, results in serious performance issue
// The profile has a process-wide mutex, results in serious performance
// in concurrency scenerio. Here use an `if` to fix this issue.
// issue
// Please not remove the `if`, ask @Superjomn if there are any concern.
// in concurrency scenerio. Here use an `if` to fix this issue.
if
(
platform
::
IsProfileEnabled
())
{
// Please not remove the `if`, ask @Superjomn if there are any concern.
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
if
(
platform
::
IsProfileEnabled
())
{
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
platform
::
DeviceContextPool
&
pool
=
RunImpl
(
scope
,
place
);
platform
::
DeviceContextPool
::
Instance
();
}
else
{
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
RunImpl
(
scope
,
place
);
}
else
{
RunImpl
(
scope
,
place
);
}
if
(
VLOG_IS_ON
(
3
))
{
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
}
catch
(
platform
::
EnforceNotMet
exception
)
{
if
(
Attrs
().
count
(
"sub_block"
)
!=
0
)
{
throw
exception
;
}
auto
&
callstack
=
Attr
<
std
::
vector
<
std
::
string
>>
(
OpProtoAndCheckerMaker
::
OpCreationCallstackAttrName
());
if
(
callstack
.
empty
())
{
throw
exception
;
}
std
::
ostringstream
sout
;
sout
<<
"Invoke operator "
<<
Type
()
<<
" error.
\n
"
;
sout
<<
"Python Callstacks:
\n
"
;
for
(
auto
&
line
:
callstack
)
{
sout
<<
line
;
}
sout
<<
"C++ Callstacks:
\n
"
;
sout
<<
exception
.
err_str_
;
exception
.
err_str_
=
sout
.
str
();
throw
exception
;
}
catch
(...)
{
std
::
rethrow_exception
(
std
::
current_exception
());
}
}
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
}
bool
OperatorBase
::
HasInputs
(
const
std
::
string
&
name
)
const
{
bool
OperatorBase
::
HasInputs
(
const
std
::
string
&
name
)
const
{
...
@@ -1057,8 +1094,8 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType(
...
@@ -1057,8 +1094,8 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType(
t
=
&
(
var
->
Get
<
SelectedRows
>
().
value
());
t
=
&
(
var
->
Get
<
SelectedRows
>
().
value
());
}
}
if
(
t
!=
nullptr
)
{
if
(
t
!=
nullptr
)
{
PADDLE_ENFORCE
(
t
->
IsInitialized
(),
"Input %s is not initialized
: %s
"
,
PADDLE_ENFORCE
(
t
->
IsInitialized
(),
"Input %s is not initialized"
,
ipt_name
,
DebugString
()
);
ipt_name
);
int
tmp
=
static_cast
<
int
>
(
t
->
type
());
int
tmp
=
static_cast
<
int
>
(
t
->
type
());
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
tmp
==
data_type
||
data_type
==
-
1
,
tmp
==
data_type
||
data_type
==
-
1
,
...
...
paddle/fluid/framework/scope_pool.cc
0 → 100644
浏览文件 @
8ed02339
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/scope_pool.h"
#include "paddle/fluid/framework/threadpool.h"
namespace
paddle
{
namespace
framework
{
ScopePool
&
ScopePool
::
Instance
()
{
// NOLINT
static
ScopePool
pool
;
return
pool
;
}
void
ScopePool
::
DeleteScope
(
Scope
*
scope
)
{
delete
scope
;
}
void
ScopePool
::
Insert
(
std
::
unique_ptr
<
Scope
>
&&
s
)
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
scopes_
.
insert
(
s
.
release
());
}
void
ScopePool
::
Remove
(
Scope
*
s
)
{
size_t
has_scope
;
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
has_scope
=
scopes_
.
erase
(
s
);
}
PADDLE_ENFORCE
(
has_scope
>
0
,
"Delete non-existing global scope"
);
DeleteScope
(
s
);
}
ScopePool
::~
ScopePool
()
{
Clear
();
}
void
ScopePool
::
Clear
()
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
for
(
auto
*
s
:
scopes_
)
{
DeleteScope
(
s
);
}
scopes_
.
clear
();
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/
platform/create_tensor_with_allocationptr
.h
→
paddle/fluid/
framework/scope_pool
.h
浏览文件 @
8ed02339
...
@@ -13,30 +13,34 @@
...
@@ -13,30 +13,34 @@
// limitations under the License.
// limitations under the License.
#pragma once
#pragma once
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/temporary_allocator.h"
#include <mutex> // NOLINT
#include <unordered_set>
#include "paddle/fluid/framework/scope.h"
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
framework
{
template
<
typename
T
>
class
ScopePool
{
paddle
::
framework
::
Tensor
GetTensor
(
public:
memory
::
allocation
::
AllocationPtr
temp_allocation_ptr
,
static
ScopePool
&
Instance
();
// NOLINT
const
framework
::
DDim
&
dim
)
{
auto
&
deleter
=
temp_allocation_ptr
.
get_deleter
();
void
Insert
(
std
::
unique_ptr
<
Scope
>
&&
s
);
auto
*
allocation_ptr
=
temp_allocation_ptr
.
release
();
auto
shared_allocation
=
void
Remove
(
Scope
*
s
);
std
::
shared_ptr
<
memory
::
allocation
::
Allocation
>
(
allocation_ptr
,
deleter
);
void
Clear
();
PADDLE_ENFORCE
(
dynamic_cast
<
TemporaryAllocation
*>
(
allocation_ptr
)
!=
nullptr
,
"The AllocationPtr must be TemporaryAllocation."
);
~
ScopePool
();
PADDLE_ENFORCE_EQ
(
allocation_ptr
->
size
(),
framework
::
product
(
dim
)
*
sizeof
(
T
));
private:
ScopePool
()
=
default
;
paddle
::
framework
::
Tensor
temp_tensor
(
std
::
type_index
(
typeid
(
T
)));
temp_tensor
.
Resize
(
dim
);
static
void
DeleteScope
(
Scope
*
scope
);
temp_tensor
.
ResetHolder
(
std
::
move
(
shared_allocation
));
return
temp_tensor
;
std
::
unordered_set
<
Scope
*>
scopes_
;
}
std
::
mutex
mtx_
;
};
}
// namespace platform
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/tensor.cc
浏览文件 @
8ed02339
...
@@ -28,8 +28,7 @@ void Tensor::check_memory_size() const {
...
@@ -28,8 +28,7 @@ void Tensor::check_memory_size() const {
"or maybe the required data-type mismatches the data already stored."
);
"or maybe the required data-type mismatches the data already stored."
);
}
}
Tensor
::
Tensor
(
std
::
type_index
type
)
Tensor
::
Tensor
(
const
proto
::
VarType
::
Type
&
dtype
)
:
type_
(
dtype
),
offset_
(
0
)
{}
:
type_
(
framework
::
ToDataType
(
type
)),
offset_
(
0
)
{}
size_t
Tensor
::
memory_size
()
const
{
size_t
Tensor
::
memory_size
()
const
{
return
holder_
==
nullptr
?
0UL
:
holder_
->
size
()
-
offset_
;
return
holder_
==
nullptr
?
0UL
:
holder_
->
size
()
-
offset_
;
...
...
paddle/fluid/framework/tensor.h
浏览文件 @
8ed02339
...
@@ -69,7 +69,7 @@ class Tensor {
...
@@ -69,7 +69,7 @@ class Tensor {
public:
public:
Tensor
()
:
type_
(
proto
::
VarType
::
FP32
),
offset_
(
0
)
{}
Tensor
()
:
type_
(
proto
::
VarType
::
FP32
),
offset_
(
0
)
{}
explicit
Tensor
(
std
::
type_index
type
);
explicit
Tensor
(
const
proto
::
VarType
::
Type
&
);
/*! Return a pointer to mutable memory block. */
/*! Return a pointer to mutable memory block. */
template
<
typename
T
>
template
<
typename
T
>
...
...
paddle/fluid/framework/tensor_util.h
浏览文件 @
8ed02339
...
@@ -19,6 +19,7 @@ limitations under the License. */
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/temporary_allocator.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -151,5 +152,26 @@ void TensorToVector(const Tensor& src, std::vector<T>* dst) {
...
@@ -151,5 +152,26 @@ void TensorToVector(const Tensor& src, std::vector<T>* dst) {
src_ptr
,
size
);
src_ptr
,
size
);
}
}
template
<
typename
T
>
paddle
::
framework
::
Tensor
GetTensor
(
memory
::
allocation
::
AllocationPtr
temp_allocation_ptr
,
const
framework
::
DDim
&
dim
)
{
auto
&
deleter
=
temp_allocation_ptr
.
get_deleter
();
auto
*
allocation_ptr
=
temp_allocation_ptr
.
release
();
auto
shared_allocation
=
std
::
shared_ptr
<
memory
::
allocation
::
Allocation
>
(
allocation_ptr
,
deleter
);
PADDLE_ENFORCE
(
dynamic_cast
<
platform
::
TemporaryAllocation
*>
(
allocation_ptr
)
!=
nullptr
,
"The AllocationPtr must be TemporaryAllocation."
);
PADDLE_ENFORCE_EQ
(
allocation_ptr
->
size
(),
framework
::
product
(
dim
)
*
sizeof
(
T
));
paddle
::
framework
::
Tensor
temp_tensor
(
framework
::
ToDataType
(
std
::
type_index
(
typeid
(
T
))));
temp_tensor
.
Resize
(
dim
);
temp_tensor
.
ResetHolder
(
std
::
move
(
shared_allocation
));
return
temp_tensor
;
}
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
8ed02339
...
@@ -231,11 +231,14 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
...
@@ -231,11 +231,14 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
inputs
[
i
].
data
.
length
());
inputs
[
i
].
data
.
length
());
}
else
{
}
else
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
place_
));
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
);
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
);
memory
::
Copy
(
dst_gpu_place
,
static_cast
<
void
*>
(
input_ptr
),
memory
::
Copy
(
dst_gpu_place
,
static_cast
<
void
*>
(
input_ptr
),
platform
::
CPUPlace
(),
inputs
[
i
].
data
.
data
(),
platform
::
CPUPlace
(),
inputs
[
i
].
data
.
data
(),
inputs
[
i
].
data
.
length
(),
inputs
[
i
].
data
.
length
(),
dev_ctx
->
stream
());
0
);
// stream 0 for sync copy
#else
#else
PADDLE_THROW
(
"Not compile with CUDA, should not reach here."
);
PADDLE_THROW
(
"Not compile with CUDA, should not reach here."
);
#endif
#endif
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
8ed02339
...
@@ -208,11 +208,14 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
...
@@ -208,11 +208,14 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
inputs
[
i
].
data
.
length
());
inputs
[
i
].
data
.
length
());
}
else
{
}
else
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
place_
));
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
);
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
);
memory
::
Copy
(
dst_gpu_place
,
static_cast
<
void
*>
(
input_ptr
),
memory
::
Copy
(
dst_gpu_place
,
static_cast
<
void
*>
(
input_ptr
),
platform
::
CPUPlace
(),
inputs
[
i
].
data
.
data
(),
platform
::
CPUPlace
(),
inputs
[
i
].
data
.
data
(),
inputs
[
i
].
data
.
length
(),
inputs
[
i
].
data
.
length
(),
dev_ctx
->
stream
());
0
);
// stream 0 for sync copy
#else
#else
PADDLE_THROW
(
"Not compile with CUDA, should not reach here."
);
PADDLE_THROW
(
"Not compile with CUDA, should not reach here."
);
#endif
#endif
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
8ed02339
...
@@ -75,6 +75,11 @@ set(LAC_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/lac")
...
@@ -75,6 +75,11 @@ set(LAC_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/lac")
download_model_and_data
(
${
LAC_INSTALL_DIR
}
"lac_model.tar.gz"
"lac_data.txt.tar.gz"
)
download_model_and_data
(
${
LAC_INSTALL_DIR
}
"lac_model.tar.gz"
"lac_data.txt.tar.gz"
)
inference_analysis_api_test
(
test_analyzer_lac
${
LAC_INSTALL_DIR
}
analyzer_lac_tester.cc
)
inference_analysis_api_test
(
test_analyzer_lac
${
LAC_INSTALL_DIR
}
analyzer_lac_tester.cc
)
# MM DNN
set
(
MM_DNN_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/mm_dnn"
)
download_model_and_data
(
${
MM_DNN_INSTALL_DIR
}
"MM_DNN_model.tar.gz"
"MM_DNN_data.txt.tar.gz"
)
inference_analysis_api_test
(
test_analyzer_mm_dnn
${
MM_DNN_INSTALL_DIR
}
analyzer_mm_dnn_tester.cc
)
# text_classification
# text_classification
set
(
TEXT_CLASSIFICATION_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/text_classification"
)
set
(
TEXT_CLASSIFICATION_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/text_classification"
)
download_model_and_data
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
"text-classification-Senta.tar.gz"
"text_classification_data.txt.tar.gz"
)
download_model_and_data
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
"text-classification-Senta.tar.gz"
"text_classification_data.txt.tar.gz"
)
...
@@ -103,6 +108,10 @@ inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose
...
@@ -103,6 +108,10 @@ inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose
inference_analysis_api_test_with_fake_data
(
test_analyzer_resnet50
inference_analysis_api_test_with_fake_data
(
test_analyzer_resnet50
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/resnet50"
analyzer_resnet50_tester.cc
"resnet50_model.tar.gz"
)
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/resnet50"
analyzer_resnet50_tester.cc
"resnet50_model.tar.gz"
)
# seq_pool1
inference_analysis_api_test_with_fake_data
(
test_analyzer_seq_pool1
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/seq_pool1"
analyzer_seq_pool1_tester.cc
"seq_pool1.tar.gz"
)
# mobilenet with depthwise_conv op
# mobilenet with depthwise_conv op
inference_analysis_api_test_with_fake_data
(
test_analyzer_mobilenet_depthwise_conv
inference_analysis_api_test_with_fake_data
(
test_analyzer_mobilenet_depthwise_conv
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/mobilenet_depthwise_conv"
analyzer_resnet50_tester.cc
"mobilenet_model.tar.gz"
)
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/mobilenet_depthwise_conv"
analyzer_resnet50_tester.cc
"mobilenet_model.tar.gz"
)
...
...
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
0 → 100644
浏览文件 @
8ed02339
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
query_data_all
,
title_data_all
;
std
::
vector
<
size_t
>
lod1
,
lod2
;
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
Load
(
path
);
}
DataRecord
NextBatch
()
{
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
query_data_all
.
size
())
{
data
.
query_data_all
.
assign
(
query_data_all
.
begin
()
+
batch_iter
,
query_data_all
.
begin
()
+
batch_end
);
data
.
title_data_all
.
assign
(
title_data_all
.
begin
()
+
batch_iter
,
title_data_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
lod1
.
push_back
(
0
);
data
.
lod2
.
push_back
(
0
);
CHECK
(
!
data
.
query_data_all
.
empty
());
CHECK
(
!
data
.
title_data_all
.
empty
());
CHECK_EQ
(
data
.
query_data_all
.
size
(),
data
.
title_data_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
query_data_all
.
size
();
j
++
)
{
// calculate lod
data
.
lod1
.
push_back
(
data
.
lod1
.
back
()
+
data
.
query_data_all
[
j
].
size
());
data
.
lod2
.
push_back
(
data
.
lod2
.
back
()
+
data
.
title_data_all
[
j
].
size
());
}
}
batch_iter
+=
batch_size
;
return
data
;
}
void
Load
(
const
std
::
string
&
path
)
{
std
::
ifstream
file
(
path
);
std
::
string
line
;
int
num_lines
=
0
;
while
(
std
::
getline
(
file
,
line
))
{
num_lines
++
;
std
::
vector
<
std
::
string
>
data
;
split
(
line
,
'\t'
,
&
data
);
// load query data
std
::
vector
<
int64_t
>
query_data
;
split_to_int64
(
data
[
0
],
' '
,
&
query_data
);
// load title data
std
::
vector
<
int64_t
>
title_data
;
split_to_int64
(
data
[
1
],
' '
,
&
title_data
);
query_data_all
.
push_back
(
std
::
move
(
query_data
));
title_data_all
.
push_back
(
std
::
move
(
title_data
));
}
num_samples
=
num_lines
;
}
};
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
int
batch_size
)
{
PaddleTensor
lod_query_tensor
,
lod_title_tensor
;
lod_query_tensor
.
name
=
"left"
;
lod_title_tensor
.
name
=
"right"
;
auto
one_batch
=
data
->
NextBatch
();
int
size1
=
one_batch
.
lod1
[
one_batch
.
lod1
.
size
()
-
1
];
// token batch size
int
size2
=
one_batch
.
lod2
[
one_batch
.
lod2
.
size
()
-
1
];
// token batch size
lod_query_tensor
.
shape
.
assign
({
size1
,
1
});
lod_query_tensor
.
lod
.
assign
({
one_batch
.
lod1
});
lod_title_tensor
.
shape
.
assign
({
size2
,
1
});
lod_title_tensor
.
lod
.
assign
({
one_batch
.
lod2
});
// assign data
TensorAssignData
<
int64_t
>
(
&
lod_query_tensor
,
one_batch
.
query_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_title_tensor
,
one_batch
.
title_data_all
);
// Set inputs.
input_slots
->
assign
({
lod_query_tensor
,
lod_title_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
tensor
.
dtype
=
PaddleDType
::
INT64
;
}
}
void
SetConfig
(
contrib
::
AnalysisConfig
*
cfg
)
{
cfg
->
model_dir
=
FLAGS_infer_model
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
std
::
vector
<
PaddleTensor
>
input_slots
;
int
epoch
=
FLAGS_test_all_data
?
data
.
num_samples
/
FLAGS_batch_size
:
1
;
LOG
(
INFO
)
<<
"number of samples: "
<<
epoch
*
FLAGS_batch_size
;
for
(
int
bid
=
0
;
bid
<
epoch
;
++
bid
)
{
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
(
*
inputs
).
emplace_back
(
input_slots
);
}
}
// Easy for profiling independently.
TEST
(
Analyzer_MM_DNN
,
profile
)
{
contrib
::
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
2UL
);
for
(
auto
&
output
:
outputs
)
{
size_t
size
=
GetSize
(
output
);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
output
.
data
.
data
());
// output is probability, which is in (-1, 1).
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_GT
(
result
[
i
],
-
1
);
EXPECT_LT
(
result
[
i
],
1
);
}
}
}
}
// Check the fuse status
TEST
(
Analyzer_MM_DNN
,
fuse_statis
)
{
contrib
::
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
int
num_ops
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
fuse_statis
=
GetFuseStatis
(
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
()),
&
num_ops
);
}
// Compare result of NativeConfig and AnalysisConfig
TEST
(
Analyzer_MM_DNN
,
compare
)
{
contrib
::
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
// Compare Deterministic result
TEST
(
Analyzer_MM_DNN
,
compare_determine
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareDeterministic
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
0 → 100644
浏览文件 @
8ed02339
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
param_file
=
FLAGS_infer_model
+
"/params"
;
cfg
->
prog_file
=
FLAGS_infer_model
+
"/model"
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
specify_input_name
=
true
;
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_paddle_num_threads
);
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
std
::
vector
<
std
::
string
>
feed_names
=
{
"slot10000_embed"
,
"slot10001_embed"
,
"slot10004_embed"
,
"slot10005_embed"
,
"slot10008_embed"
,
"slot10009_embed"
,
"slot10012_embed"
,
"slot10013_embed"
,
"slot10108_embed"
,
"slot13324_embed"
,
"slot13325_embed"
,
"slot13326_embed"
,
"slot13327_embed"
,
"slot13328_embed"
,
"slot13329_embed"
,
"slot13330_embed"
,
"slot13331_embed"
,
"slot15501_embed"
,
"slot15502_embed"
,
"slot15503_embed"
,
"slot15504_embed"
,
"slot15505_embed"
,
"slot15506_embed"
,
"slot15507_embed"
,
"slot15508_embed"
,
"slot15516_embed"
,
"slot15519_embed"
,
"slot15523_embed"
,
"slot15531_embed"
,
"slot15533_embed"
,
"slot15548_embed"
,
"slot15564_embed"
,
"slot15565_embed"
,
"slot15566_embed"
,
"slot15570_embed"
,
"slot15571_embed"
,
"slot15572_embed"
,
"slot15573_embed"
,
"slot15574_embed"
,
"slot15575_embed"
,
"slot15576_embed"
,
"slot15577_embed"
,
"slot15579_embed"
,
"slot15581_embed"
,
"slot15582_embed"
,
"slot15583_embed"
,
"slot15584_embed"
,
"slot5016_embed"
,
"slot5021_embed"
,
"slot6002_embed"
,
"slot6003_embed"
,
"slot6004_embed"
,
"slot6005_embed"
,
"slot6006_embed"
,
"slot6007_embed"
,
"slot6008_embed"
,
"slot6009_embed"
,
"slot6011_embed"
,
"slot6014_embed"
,
"slot6015_embed"
,
"slot6023_embed"
,
"slot6024_embed"
,
"slot6025_embed"
,
"slot6027_embed"
,
"slot6029_embed"
,
"slot6031_embed"
,
"slot6034_embed"
,
"slot6035_embed"
,
"slot6036_embed"
,
"slot6037_embed"
,
"slot6039_embed"
,
"slot6048_embed"
,
"slot6050_embed"
,
"slot6058_embed"
,
"slot6059_embed"
,
"slot6060_embed"
,
"slot6066_embed"
,
"slot6067_embed"
,
"slot6068_embed"
,
"slot6069_embed"
,
"slot6070_embed"
,
"slot6071_embed"
,
"slot6072_embed"
,
"slot6073_embed"
,
"slot6182_embed"
,
"slot6183_embed"
,
"slot6184_embed"
,
"slot6185_embed"
,
"slot6186_embed"
,
"slot6188_embed"
,
"slot6189_embed"
,
"slot6190_embed"
,
"slot6201_embed"
,
"slot6202_embed"
,
"slot6203_embed"
,
"slot6247_embed"
,
"slot6248_embed"
,
"slot6250_embed"
,
"slot6251_embed"
,
"slot6807_embed"
,
"slot6808_embed"
,
"slot6809_embed"
,
"slot6810_embed"
,
"slot6811_embed"
,
"slot6812_embed"
,
"slot6813_embed"
,
"slot6814_embed"
,
"slot6815_embed"
,
"slot6816_embed"
,
"slot6817_embed"
,
"slot6818_embed"
,
"slot6819_embed"
,
"slot6820_embed"
,
"slot6822_embed"
,
"slot6823_embed"
,
"slot6826_embed"
,
"slot7002_embed"
,
"slot7003_embed"
,
"slot7004_embed"
,
"slot7005_embed"
,
"slot7006_embed"
,
"slot7008_embed"
,
"slot7009_embed"
,
"slot7010_embed"
,
"slot7011_embed"
,
"slot7013_embed"
,
"slot7014_embed"
,
"slot7015_embed"
,
"slot7016_embed"
,
"slot7017_embed"
,
"slot7019_embed"
,
"slot7100_embed"
,
"slot7506_embed"
,
"slot7507_embed"
,
"slot7514_embed"
,
"slot7515_embed"
,
"slot7516_embed"
};
SetFakeImageInput
(
inputs
,
FLAGS_infer_model
,
true
,
"model"
,
"params"
,
&
feed_names
);
}
// Easy for profiling independently.
void
profile
(
bool
use_mkldnn
=
false
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
if
(
use_mkldnn
)
{
cfg
.
EnableMKLDNN
();
}
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
}
TEST
(
Analyzer_seq_pool1
,
profile
)
{
profile
();
}
// Check the fuse status
TEST
(
Analyzer_seq_pool1
,
fuse_statis
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
int
num_ops
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
fuse_statis
=
GetFuseStatis
(
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
()),
&
num_ops
);
LOG
(
INFO
)
<<
"num_ops: "
<<
num_ops
;
EXPECT_EQ
(
num_ops
,
314
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
8ed02339
...
@@ -132,7 +132,8 @@ std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor,
...
@@ -132,7 +132,8 @@ std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor,
void
SetFakeImageInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
,
void
SetFakeImageInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
,
const
std
::
string
&
dirname
,
bool
is_combined
=
true
,
const
std
::
string
&
dirname
,
bool
is_combined
=
true
,
std
::
string
model_filename
=
"model"
,
std
::
string
model_filename
=
"model"
,
std
::
string
params_filename
=
"params"
)
{
std
::
string
params_filename
=
"params"
,
const
std
::
vector
<
std
::
string
>
*
feed_names
=
nullptr
)
{
// Set fake_image_data
// Set fake_image_data
PADDLE_ENFORCE_EQ
(
FLAGS_test_all_data
,
0
,
"Only have single batch of data."
);
PADDLE_ENFORCE_EQ
(
FLAGS_test_all_data
,
0
,
"Only have single batch of data."
);
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
GetFeedTargetShapes
(
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
GetFeedTargetShapes
(
...
@@ -146,26 +147,32 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
...
@@ -146,26 +147,32 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
os
<<
"}
\n
"
;
os
<<
"}
\n
"
;
}
}
LOG
(
INFO
)
<<
os
.
str
();
LOG
(
INFO
)
<<
os
.
str
();
if
(
feed_names
)
{
int
dim1
=
feed_target_shapes
[
0
][
1
];
PADDLE_ENFORCE_EQ
(
feed_names
->
size
(),
feed_target_shapes
.
size
());
int
dim2
=
feed_target_shapes
[
0
][
2
];
}
int
dim3
=
feed_target_shapes
[
0
][
3
];
std
::
vector
<
PaddleTensor
>
input_slots
(
feed_target_shapes
.
size
());
for
(
size_t
i
=
0
;
i
<
feed_target_shapes
.
size
();
++
i
)
{
PaddleTensor
input
;
const
auto
&
feed_shape
=
feed_target_shapes
[
i
];
std
::
vector
<
int
>
shape
({
FLAGS_batch_size
,
dim1
,
dim2
,
dim3
});
auto
&
input
=
input_slots
[
i
];
input
.
shape
=
shape
;
std
::
vector
<
int
>
shape
({
FLAGS_batch_size
});
input
.
dtype
=
PaddleDType
::
FLOAT32
;
for
(
size_t
s
=
1
;
s
<
feed_shape
.
size
();
++
s
)
{
shape
.
push_back
(
static_cast
<
int
>
(
feed_shape
[
s
]));
// fill input data, for profile easily, do not use random data here.
}
size_t
size
=
FLAGS_batch_size
*
dim1
*
dim2
*
dim3
;
if
(
feed_names
)
{
input
.
data
.
Resize
(
size
*
sizeof
(
float
));
input
.
name
=
(
*
feed_names
)[
i
];
float
*
input_data
=
static_cast
<
float
*>
(
input
.
data
.
data
());
}
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
input
.
shape
=
shape
;
*
(
input_data
+
i
)
=
static_cast
<
float
>
(
i
)
/
size
;
input
.
dtype
=
PaddleDType
::
FLOAT32
;
size_t
len
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
input
.
data
.
Resize
(
len
*
sizeof
(
float
));
input
.
lod
.
assign
({{
0
,
static_cast
<
size_t
>
(
FLAGS_batch_size
)}});
float
*
input_data
=
static_cast
<
float
*>
(
input
.
data
.
data
());
// fill input data, for profile easily, do not use random data here.
for
(
size_t
j
=
0
;
j
<
len
;
++
j
)
{
*
(
input_data
+
j
)
=
static_cast
<
float
>
(
j
)
/
len
;
}
}
}
std
::
vector
<
PaddleTensor
>
input_slots
;
input_slots
.
assign
({
input
});
(
*
inputs
).
emplace_back
(
input_slots
);
(
*
inputs
).
emplace_back
(
input_slots
);
}
}
...
...
paddle/fluid/operators/conv_op.h
浏览文件 @
8ed02339
...
@@ -18,11 +18,11 @@ limitations under the License. */
...
@@ -18,11 +18,11 @@ limitations under the License. */
#include <vector>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/depthwise_conv.h"
#include "paddle/fluid/operators/math/depthwise_conv.h"
#include "paddle/fluid/operators/math/im2col.h"
#include "paddle/fluid/operators/math/im2col.h"
#include "paddle/fluid/operators/math/vol2col.h"
#include "paddle/fluid/operators/math/vol2col.h"
#include "paddle/fluid/platform/create_tensor_with_allocationptr.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -161,10 +161,7 @@ class GemmConvKernel : public framework::OpKernel<T> {
...
@@ -161,10 +161,7 @@ class GemmConvKernel : public framework::OpKernel<T> {
auto
tmp_allocation_ptr
=
auto
tmp_allocation_ptr
=
platform
::
DeviceTemporaryAllocator
::
Instance
().
Get
(
dev_ctx
).
Allocate
(
platform
::
DeviceTemporaryAllocator
::
Instance
().
Get
(
dev_ctx
).
Allocate
(
framework
::
product
(
col_shape
)
*
sizeof
(
T
));
framework
::
product
(
col_shape
)
*
sizeof
(
T
));
Tensor
tep_tensor
=
col
=
framework
::
GetTensor
<
T
>
(
std
::
move
(
tmp_allocation_ptr
),
col_shape
);
platform
::
GetTensor
<
T
>
(
std
::
move
(
tmp_allocation_ptr
),
col_shape
);
col
.
ShareDataWith
(
tep_tensor
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
}
...
@@ -299,10 +296,7 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
...
@@ -299,10 +296,7 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
auto
tmp_allocation_ptr
=
auto
tmp_allocation_ptr
=
platform
::
DeviceTemporaryAllocator
::
Instance
().
Get
(
dev_ctx
).
Allocate
(
platform
::
DeviceTemporaryAllocator
::
Instance
().
Get
(
dev_ctx
).
Allocate
(
framework
::
product
(
col_shape
)
*
sizeof
(
T
));
framework
::
product
(
col_shape
)
*
sizeof
(
T
));
Tensor
tep_tensor
=
col
=
framework
::
GetTensor
<
T
>
(
std
::
move
(
tmp_allocation_ptr
),
col_shape
);
platform
::
GetTensor
<
T
>
(
std
::
move
(
tmp_allocation_ptr
),
col_shape
);
col
.
ShareDataWith
(
tep_tensor
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
}
...
...
paddle/fluid/operators/dequantize_mkldnn_op.cc
0 → 100644
浏览文件 @
8ed02339
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/dequantize_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace
paddle
{
namespace
operators
{
using
mkldnn
::
memory
;
using
mkldnn
::
primitive
;
using
mkldnn
::
reorder
;
using
platform
::
to_void_cast
;
using
Tensor
=
framework
::
Tensor
;
using
framework
::
DataLayout
;
using
mkldnn
::
stream
;
using
platform
::
GetMKLDNNFormat
;
template
<
typename
T
>
class
DeQuantOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
scale_data
=
ctx
.
Attr
<
float
>
(
"Scale"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
engine
=
dev_ctx
.
GetEngine
();
const
T
*
input_data
=
input
->
data
<
T
>
();
float
*
output_data
=
output
->
mutable_data
<
float
>
(
ctx
.
GetPlace
());
std
::
vector
<
float
>
reorder_scale
=
{
1.0
f
/
scale_data
};
std
::
vector
<
primitive
>
pipeline
;
std
::
vector
<
int
>
src_tz
=
paddle
::
framework
::
vectorize2int
(
input
->
dims
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
mkldnn
::
memory
::
data_type
src_dt
=
paddle
::
framework
::
ToMKLDNNDataType
(
input
->
type
());
mkldnn
::
memory
::
format
src_fmt
=
input
->
format
();
mkldnn
::
primitive_attr
attri
;
int
mask
=
0
;
attri
.
set_output_scales
(
mask
,
reorder_scale
);
auto
src_md
=
platform
::
MKLDNNMemDesc
({
src_tz
},
src_dt
,
src_fmt
);
auto
src_pd
=
mkldnn
::
memory
::
primitive_desc
(
src_md
,
engine
);
auto
src_memory
=
std
::
make_shared
<
mkldnn
::
memory
>
(
src_pd
,
to_void_cast
<
T
>
(
input_data
));
std
::
shared_ptr
<
primitive
::
at
>
src_memory_p
=
std
::
shared_ptr
<
primitive
::
at
>
(
new
primitive
::
at
(
*
src_memory
));
auto
dst_md
=
platform
::
MKLDNNMemDesc
({
dst_tz
},
memory
::
data_type
::
f32
,
memory
::
format
::
nchw
);
auto
dst_pd
=
mkldnn
::
memory
::
primitive_desc
(
dst_md
,
engine
);
auto
dst_memory
=
mkldnn
::
memory
(
dst_pd
,
to_void_cast
<
float
>
(
output_data
));
auto
reorder_pd
=
std
::
shared_ptr
<
reorder
::
primitive_desc
>
(
new
reorder
::
primitive_desc
(
src_pd
,
dst_pd
,
attri
));
auto
reorder_p
=
std
::
shared_ptr
<
reorder
>
(
new
reorder
(
*
reorder_pd
,
*
src_memory_p
,
dst_memory
));
pipeline
.
push_back
(
*
reorder_p
);
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
output
->
set_format
(
GetMKLDNNFormat
(
dst_memory
));
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_KERNEL
(
dequantize
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
DeQuantOpKernel
<
uint8_t
>
,
ops
::
DeQuantOpKernel
<
int8_t
>
);
paddle/fluid/operators/dequantize_op.cc
0 → 100644
浏览文件 @
8ed02339
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/dequantize_op.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace
paddle
{
namespace
operators
{
framework
::
OpKernelType
DeQuantOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library_
=
framework
::
LibraryType
::
kMKLDNN
;
framework
::
DataLayout
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
(),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
void
DeQuantOpMaker
::
Make
()
{
AddInput
(
"Input"
,
"input data"
);
AddOutput
(
"Output"
,
"output data"
);
AddAttr
<
float
>
(
"Scale"
,
"scale data"
).
SetDefault
({
1.0
f
});
AddComment
(
R"DOC(This op will dequantize data from INT8 to FP32)DOC"
);
}
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
dequantize
,
ops
::
DeQuantOp
,
ops
::
DeQuantOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
paddle/fluid/operators/dequantize_op.h
0 → 100644
浏览文件 @
8ed02339
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
OpKernelType
;
using
framework
::
Tensor
;
class
DeQuantOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ctx
->
SetOutputDim
(
"Output"
,
ctx
->
GetInputDim
(
"Input"
));
ctx
->
ShareLoD
(
"Input"
,
/*->*/
"Output"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
class
DeQuantOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
;
};
class
DeQuantGradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/detection/density_prior_box_op.cu
浏览文件 @
8ed02339
...
@@ -142,12 +142,13 @@ class DensityPriorBoxOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -142,12 +142,13 @@ class DensityPriorBoxOpCUDAKernel : public framework::OpKernel<T> {
vars
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
vars
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
framework
::
Tensor
d_temp
;
framework
::
Tensor
d_temp
;
framework
::
TensorCopy
Sync
(
h_temp
,
ctx
.
GetPlace
(),
&
d_temp
);
framework
::
TensorCopy
(
h_temp
,
ctx
.
GetPlace
(),
&
d_temp
);
// At least use 32 threads, at most 512 threads.
// At least use 32 threads, at most 512 threads.
// blockx is multiple of 32.
// blockx is multiple of 32.
int
blockx
=
std
::
min
(
int
blockx
=
std
::
min
(
static_cast
<
long
>
(((
feature_width
*
num_priors
+
31
)
>>
5
)
<<
5
),
512L
);
static_cast
<
int64_t
>
(((
feature_width
*
num_priors
+
31
)
>>
5
)
<<
5
),
512L
);
int
gridx
=
(
feature_width
*
num_priors
+
blockx
-
1
)
/
blockx
;
int
gridx
=
(
feature_width
*
num_priors
+
blockx
-
1
)
/
blockx
;
dim3
threads
(
blockx
,
1
);
dim3
threads
(
blockx
,
1
);
dim3
grids
(
gridx
,
feature_height
);
dim3
grids
(
gridx
,
feature_height
);
...
...
paddle/fluid/operators/elementwise/elementwise_div_op.cu
浏览文件 @
8ed02339
...
@@ -12,18 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,18 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
elementwise_div
,
elementwise_div
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
elementwise_div_grad
,
elementwise_div_grad
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
...
...
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
浏览文件 @
8ed02339
...
@@ -12,19 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,19 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_mul_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_mul_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
elementwise_mul
,
elementwise_mul
,
ops
::
ElementwiseMulKernel
<
plat
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
in
t
>
,
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
int64_
t
>
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
plat
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
elementwise_mul_grad
,
elementwise_mul_grad
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
int64_t
>
,
int64_t
>
);
ops
::
ElementwiseMulGradKernel
<
plat
::
CUDADeviceContext
,
plat
::
float16
>
);
paddle/fluid/operators/fill_zeros_like_op.cu.cc
浏览文件 @
8ed02339
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/fill_zeros_like_op.h"
#include "paddle/fluid/operators/fill_zeros_like_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
...
@@ -22,4 +23,6 @@ REGISTER_OP_CUDA_KERNEL(
...
@@ -22,4 +23,6 @@ REGISTER_OP_CUDA_KERNEL(
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
);
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
);
paddle/fluid/operators/math/concat_and_split.cu
浏览文件 @
8ed02339
...
@@ -131,8 +131,9 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
...
@@ -131,8 +131,9 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
int
in_col
=
input
[
0
].
numel
()
/
in_row
;
int
in_col
=
input
[
0
].
numel
()
/
in_row
;
int
out_row
=
in_row
,
out_col
=
0
;
int
out_row
=
in_row
,
out_col
=
0
;
std
::
vector
<
T
*>
inputs_data
(
in_num
)
;
std
::
vector
<
const
T
*>
inputs_data
;
std
::
vector
<
int
>
inputs_col
(
in_num
+
1
);
std
::
vector
<
int
>
inputs_col
(
in_num
+
1
);
inputs_data
.
reserve
(
in_num
);
inputs_col
[
0
]
=
0
;
inputs_col
[
0
]
=
0
;
bool
sameShape
=
true
;
bool
sameShape
=
true
;
...
@@ -143,7 +144,7 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
...
@@ -143,7 +144,7 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
}
}
out_col
+=
t_cols
;
out_col
+=
t_cols
;
inputs_col
[
i
+
1
]
=
out_col
;
inputs_col
[
i
+
1
]
=
out_col
;
inputs_data
[
i
]
=
const_cast
<
T
*>
(
input
[
i
].
data
<
T
>
());
inputs_data
.
emplace_back
(
input
[
i
].
data
<
T
>
());
}
}
// computation
// computation
...
...
paddle/fluid/operators/math/selected_rows_functor.cc
浏览文件 @
8ed02339
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <algorithm>
#include <set>
#include <set>
#include <unordered_map>
#include <unordered_map>
...
@@ -252,23 +253,26 @@ elementwise_add_to(const DeviceContext& ctx, BlasT<DeviceContext, T>* blas,
...
@@ -252,23 +253,26 @@ elementwise_add_to(const DeviceContext& ctx, BlasT<DeviceContext, T>* blas,
template
<
typename
T
>
template
<
typename
T
>
struct
MergeAdd
<
platform
::
CPUDeviceContext
,
T
>
{
struct
MergeAdd
<
platform
::
CPUDeviceContext
,
T
>
{
framework
::
SelectedRows
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
framework
::
SelectedRows
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
SelectedRows
&
input
)
{
const
framework
::
SelectedRows
&
input
,
const
bool
sorted_result
=
false
)
{
framework
::
SelectedRows
out
;
framework
::
SelectedRows
out
;
(
*
this
)(
context
,
input
,
&
out
);
(
*
this
)(
context
,
input
,
&
out
,
sorted_result
);
return
out
;
return
out
;
}
}
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
SelectedRows
&
input
,
const
framework
::
SelectedRows
&
input
,
framework
::
SelectedRows
*
output
)
{
framework
::
SelectedRows
*
output
,
const
bool
sorted_result
=
false
)
{
std
::
vector
<
const
framework
::
SelectedRows
*>
inputs
;
std
::
vector
<
const
framework
::
SelectedRows
*>
inputs
;
inputs
.
push_back
(
&
input
);
inputs
.
push_back
(
&
input
);
(
*
this
)(
context
,
inputs
,
output
);
(
*
this
)(
context
,
inputs
,
output
,
sorted_result
);
}
}
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
std
::
vector
<
const
framework
::
SelectedRows
*>&
inputs
,
const
std
::
vector
<
const
framework
::
SelectedRows
*>&
inputs
,
framework
::
SelectedRows
*
output
)
{
framework
::
SelectedRows
*
output
,
const
bool
sorted_result
=
false
)
{
if
(
inputs
.
size
()
==
0
)
{
if
(
inputs
.
size
()
==
0
)
{
VLOG
(
3
)
<<
"no input! return"
;
VLOG
(
3
)
<<
"no input! return"
;
return
;
return
;
...
@@ -301,6 +305,9 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
...
@@ -301,6 +305,9 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
}
}
std
::
vector
<
int64_t
>
merge_rows
(
merged_row_set
.
begin
(),
std
::
vector
<
int64_t
>
merge_rows
(
merged_row_set
.
begin
(),
merged_row_set
.
end
());
merged_row_set
.
end
());
if
(
sorted_result
)
{
std
::
sort
(
merge_rows
.
begin
(),
merge_rows
.
end
());
}
std
::
unordered_map
<
int64_t
,
size_t
>
rows_to_id
;
std
::
unordered_map
<
int64_t
,
size_t
>
rows_to_id
;
for
(
size_t
i
=
0
;
i
<
merge_rows
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
merge_rows
.
size
();
++
i
)
{
rows_to_id
[
merge_rows
[
i
]]
=
i
;
rows_to_id
[
merge_rows
[
i
]]
=
i
;
...
...
paddle/fluid/operators/math/selected_rows_functor.cu
浏览文件 @
8ed02339
...
@@ -266,7 +266,8 @@ __global__ void MergeAddKernel(const T* input, const int64_t* input_rows,
...
@@ -266,7 +266,8 @@ __global__ void MergeAddKernel(const T* input, const int64_t* input_rows,
template
<
typename
T
>
template
<
typename
T
>
struct
MergeAdd
<
platform
::
CUDADeviceContext
,
T
>
{
struct
MergeAdd
<
platform
::
CUDADeviceContext
,
T
>
{
framework
::
SelectedRows
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
framework
::
SelectedRows
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
const
framework
::
SelectedRows
&
input
)
{
const
framework
::
SelectedRows
&
input
,
const
bool
sorted_result
=
false
)
{
framework
::
SelectedRows
out
;
framework
::
SelectedRows
out
;
(
*
this
)(
context
,
input
,
&
out
);
(
*
this
)(
context
,
input
,
&
out
);
return
out
;
return
out
;
...
@@ -274,7 +275,8 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
...
@@ -274,7 +275,8 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
const
framework
::
SelectedRows
&
input
,
const
framework
::
SelectedRows
&
input
,
framework
::
SelectedRows
*
output
)
{
framework
::
SelectedRows
*
output
,
const
bool
sorted_result
=
false
)
{
framework
::
Vector
<
int64_t
>
input_rows
(
input
.
rows
());
framework
::
Vector
<
int64_t
>
input_rows
(
input
.
rows
());
if
(
input_rows
.
size
()
==
0
)
{
if
(
input_rows
.
size
()
==
0
)
{
return
;
return
;
...
@@ -312,7 +314,8 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
...
@@ -312,7 +314,8 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
const
std
::
vector
<
const
framework
::
SelectedRows
*>&
inputs
,
const
std
::
vector
<
const
framework
::
SelectedRows
*>&
inputs
,
framework
::
SelectedRows
*
output
)
{
framework
::
SelectedRows
*
output
,
const
bool
sorted_result
=
false
)
{
if
(
inputs
.
size
()
==
0
)
{
if
(
inputs
.
size
()
==
0
)
{
VLOG
(
3
)
<<
"no input! return"
;
VLOG
(
3
)
<<
"no input! return"
;
return
;
return
;
...
...
paddle/fluid/operators/math/selected_rows_functor.h
浏览文件 @
8ed02339
...
@@ -81,13 +81,16 @@ struct MergeAdd {
...
@@ -81,13 +81,16 @@ struct MergeAdd {
// unary functor, merge by adding duplicated rows in
// unary functor, merge by adding duplicated rows in
// the input SelectedRows object.
// the input SelectedRows object.
framework
::
SelectedRows
operator
()(
const
DeviceContext
&
context
,
framework
::
SelectedRows
operator
()(
const
DeviceContext
&
context
,
const
framework
::
SelectedRows
&
input
);
const
framework
::
SelectedRows
&
input
,
const
bool
sorted_result
=
false
);
void
operator
()(
const
DeviceContext
&
context
,
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
SelectedRows
&
input
,
const
framework
::
SelectedRows
&
input
,
framework
::
SelectedRows
*
output
);
framework
::
SelectedRows
*
output
,
const
bool
sorted_result
=
false
);
void
operator
()(
const
DeviceContext
&
context
,
void
operator
()(
const
DeviceContext
&
context
,
const
std
::
vector
<
const
framework
::
SelectedRows
*>&
inputs
,
const
std
::
vector
<
const
framework
::
SelectedRows
*>&
inputs
,
framework
::
SelectedRows
*
output
);
framework
::
SelectedRows
*
output
,
const
bool
sorted_result
=
false
);
};
};
enum
class
ScatterOps
{
ASSIGN
,
ADD
,
SUB
,
SUBBY
,
MUL
,
DIV
,
DIVBY
};
enum
class
ScatterOps
{
ASSIGN
,
ADD
,
SUB
,
SUBBY
,
MUL
,
DIV
,
DIVBY
};
...
...
paddle/fluid/operators/metrics/accuracy_op.cu
浏览文件 @
8ed02339
...
@@ -16,6 +16,7 @@ limitations under the License. */
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <thrust/reduce.h>
#include <thrust/reduce.h>
#include "paddle/fluid/operators/metrics/accuracy_op.h"
#include "paddle/fluid/operators/metrics/accuracy_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/gpu_info.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -94,6 +95,7 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -94,6 +95,7 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
// FIXME(typhoonzero): types of T is for inference data.
// FIXME(typhoonzero): types of T is for inference data.
// label data is always int64
// label data is always int64
REGISTER_OP_CUDA_KERNEL
(
accuracy
,
REGISTER_OP_CUDA_KERNEL
(
paddle
::
operators
::
AccuracyOpCUDAKernel
<
float
>
,
accuracy
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
float
>
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
double
>
);
paddle
::
operators
::
AccuracyOpCUDAKernel
<
double
>
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/ngraph/ngraph_ops.h
浏览文件 @
8ed02339
...
@@ -22,4 +22,6 @@ limitations under the License. */
...
@@ -22,4 +22,6 @@ limitations under the License. */
#pragma once
#pragma once
#include "ops/binary_unnary_op.h"
#include "ops/binary_unnary_op.h"
#include "ops/fill_constant_op.h"
#include "ops/mul_op.h"
#include "ops/mul_op.h"
#include "ops/top_k_op.h"
paddle/fluid/operators/ngraph/ops/binary_unnary_op.h
浏览文件 @
8ed02339
...
@@ -45,7 +45,6 @@ static void BuildUnaryNode(
...
@@ -45,7 +45,6 @@ static void BuildUnaryNode(
auto
out
=
std
::
make_shared
<
T
>
(
input
);
auto
out
=
std
::
make_shared
<
T
>
(
input
);
paddle
::
platform
::
SetOutputNode
(
op
,
"Out"
,
out
,
ngb_node_map
);
paddle
::
platform
::
SetOutputNode
(
op
,
"Out"
,
out
,
ngb_node_map
);
}
}
}
// namespace ngraphs
}
// namespace ngraphs
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
...
...
paddle/fluid/operators/ngraph/ops/fill_constant_op.h
0 → 100644
浏览文件 @
8ed02339
/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#pragma once
#include <string>
#include <vector>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace
paddle
{
namespace
operators
{
namespace
ngraphs
{
void
BuildFillConstantNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
op_attrs
=
paddle
::
framework
::
AttrReader
(
op
->
Attrs
());
auto
vsp
=
op_attrs
.
Get
<
std
::
vector
<
int64_t
>>
(
"shape"
);
ngraph
::
Shape
shape
;
for
(
auto
&
sp
:
vsp
)
{
shape
.
push_back
(
sp
);
}
float
value
=
op_attrs
.
Get
<
float
>
(
"value"
);
ngraph
::
element
::
Type
ng_dtype
;
auto
data_type
=
static_cast
<
paddle
::
framework
::
proto
::
VarType
::
Type
>
(
op_attrs
.
Get
<
int
>
(
"dtype"
));
if
(
data_type
==
paddle
::
framework
::
proto
::
VarType
::
FP32
)
{
ng_dtype
=
ngraph
::
element
::
f32
;
}
else
if
(
data_type
==
paddle
::
framework
::
proto
::
VarType
::
FP64
)
{
ng_dtype
=
ngraph
::
element
::
f64
;
}
else
if
(
data_type
==
paddle
::
framework
::
proto
::
VarType
::
INT64
)
{
ng_dtype
=
ngraph
::
element
::
i64
;
}
else
if
(
data_type
==
paddle
::
framework
::
proto
::
VarType
::
INT32
)
{
ng_dtype
=
ngraph
::
element
::
i32
;
}
else
if
(
data_type
==
paddle
::
framework
::
proto
::
VarType
::
BOOL
)
{
ng_dtype
=
ngraph
::
element
::
boolean
;
}
else
{
PADDLE_THROW
(
"unsupported data type: %s"
,
data_type
);
}
auto
out
=
ngraph
::
op
::
Constant
::
create
(
ng_dtype
,
shape
,
{
value
});
paddle
::
platform
::
SetOutputNode
(
op
,
"Out"
,
out
,
ngb_node_map
);
}
}
// namespace ngraphs
}
// namespace operators
}
// namespace paddle
#endif
paddle/fluid/operators/ngraph/ops/top_k_op.h
0 → 100644
浏览文件 @
8ed02339
/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#pragma once
#include <string>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace
paddle
{
namespace
operators
{
namespace
ngraphs
{
void
BuildTopKNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
op_attrs
=
paddle
::
framework
::
AttrReader
(
op
->
Attrs
());
int
k
=
op_attrs
.
Get
<
int
>
(
"k"
);
auto
input
=
paddle
::
platform
::
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
top_k
=
std
::
make_shared
<
ngraph
::
op
::
TopK
>
(
input
,
input
->
get_shape
().
size
()
-
1
,
ngraph
::
element
::
i64
,
k
);
std
::
shared_ptr
<
ngraph
::
Node
>
indices
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
top_k
,
0
);
std
::
shared_ptr
<
ngraph
::
Node
>
out
=
std
::
make_shared
<
ngraph
::
op
::
GetOutputElement
>
(
top_k
,
1
);
auto
dummy_out
=
paddle
::
platform
::
GetOutputNode
(
op
,
"Out"
,
ngb_node_map
);
if
(
dummy_out
&&
dummy_out
->
get_element_type
()
!=
out
->
get_element_type
())
{
out
=
std
::
make_shared
<
ngraph
::
op
::
Convert
>
(
out
,
dummy_out
->
get_element_type
());
}
paddle
::
platform
::
SetOutputNode
(
op
,
"Indices"
,
indices
,
ngb_node_map
);
paddle
::
platform
::
SetOutputNode
(
op
,
"Out"
,
out
,
ngb_node_map
);
}
}
// namespace ngraphs
}
// namespace operators
}
// namespace paddle
#endif
paddle/fluid/operators/optimizers/adam_op.h
浏览文件 @
8ed02339
...
@@ -157,8 +157,11 @@ struct AdamFunctor<T, CPUAdam> {
...
@@ -157,8 +157,11 @@ struct AdamFunctor<T, CPUAdam> {
}
}
};
};
template
<
typename
T
,
typename
Flavour
>
struct
SparseAdamFunctor
;
template
<
typename
T
>
template
<
typename
T
>
struct
SparseAdamFunctor
{
struct
SparseAdamFunctor
<
T
,
GPUAdam
>
{
T
beta1_
;
T
beta1_
;
T
beta2_
;
T
beta2_
;
T
epsilon_
;
T
epsilon_
;
...
@@ -236,6 +239,106 @@ struct SparseAdamFunctor {
...
@@ -236,6 +239,106 @@ struct SparseAdamFunctor {
}
}
};
};
template
<
typename
T
>
struct
SparseAdamFunctor
<
T
,
CPUAdam
>
{
T
beta1_
;
T
beta2_
;
T
epsilon_
;
const
T
*
beta1_pow_
;
const
T
*
beta2_pow_
;
const
T
*
moment1_
;
T
*
moment1_out_
;
const
T
*
moment2_
;
T
*
moment2_out_
;
const
T
*
lr_
;
const
T
*
grad_
;
const
T
*
param_
;
T
*
param_out_
;
const
int64_t
*
rows_
;
int64_t
row_numel_
;
int64_t
row_count_
;
SparseAdamFunctor
(
T
beta1
,
T
beta2
,
T
epsilon
,
const
T
*
beta1_pow
,
const
T
*
beta2_pow
,
const
T
*
mom1
,
T
*
mom1_out
,
const
T
*
mom2
,
T
*
mom2_out
,
const
T
*
lr
,
const
T
*
grad
,
const
T
*
param
,
T
*
param_out
,
const
int64_t
*
rows
,
int64_t
row_numel
,
int64_t
row_count
,
bool
lazy_mode
)
:
beta1_
(
beta1
),
beta2_
(
beta2
),
epsilon_
(
epsilon
),
beta1_pow_
(
beta1_pow
),
beta2_pow_
(
beta2_pow
),
moment1_
(
mom1
),
moment1_out_
(
mom1_out
),
moment2_
(
mom2
),
moment2_out_
(
mom2_out
),
lr_
(
lr
),
grad_
(
grad
),
param_
(
param
),
param_out_
(
param_out
),
rows_
(
rows
),
row_numel_
(
row_numel
),
row_count_
(
row_count
)
{}
inline
HOSTDEVICE
void
adam_update
(
size_t
i
,
T
g
)
const
{
// The following code is the same as dense
T
mom1
=
moment1_
[
i
];
T
mom2
=
moment2_
[
i
];
T
lr
=
*
lr_
;
T
beta1_pow
=
*
beta1_pow_
;
T
beta2_pow
=
*
beta2_pow_
;
T
p
=
param_
[
i
];
// Calculation
lr
*=
sqrt
(
1
-
beta2_pow
)
/
(
1
-
beta1_pow
);
mom1
=
beta1_
*
mom1
+
(
1
-
beta1_
)
*
g
;
mom2
=
beta2_
*
mom2
+
(
1
-
beta2_
)
*
g
*
g
;
p
-=
lr
*
(
mom1
/
(
sqrt
(
mom2
)
+
epsilon_
));
// Write back to global memory
moment1_out_
[
i
]
=
mom1
;
moment2_out_
[
i
]
=
mom2
;
param_out_
[
i
]
=
p
;
}
inline
void
operator
()(
size_t
numel
)
const
{
// lr could be reuse
T
lr
=
*
lr_
;
T
beta1_pow
=
*
beta1_pow_
;
T
beta2_pow
=
*
beta2_pow_
;
lr
*=
sqrt
(
1
-
beta2_pow
)
/
(
1
-
beta1_pow
);
size_t
row_count
=
numel
/
row_numel_
;
for
(
size_t
i
=
0U
,
j
=
0U
;
i
!=
row_count
;
++
i
)
{
if
(
i
==
*
(
rows_
+
j
))
{
for
(
size_t
k
=
0U
;
k
!=
row_numel_
;
++
k
)
{
T
g
=
grad_
[
j
*
row_numel_
+
k
];
adam_update
(
i
*
row_numel_
+
k
,
g
);
}
++
j
;
}
else
{
for
(
size_t
k
=
0U
;
k
!=
row_numel_
;
++
k
)
{
T
mom1
=
moment1_
[
i
*
row_numel_
+
k
];
T
mom2
=
moment2_
[
i
*
row_numel_
+
k
];
T
p
=
param_
[
i
*
row_numel_
+
k
];
mom1
=
beta1_
*
mom1
;
mom2
=
beta2_
*
mom2
;
p
-=
lr
*
(
mom1
/
(
sqrt
(
mom2
)
+
epsilon_
));
// Write back to global memory
moment1_out_
[
i
*
row_numel_
+
k
]
=
mom1
;
moment2_out_
[
i
*
row_numel_
+
k
]
=
mom2
;
param_out_
[
i
*
row_numel_
+
k
]
=
p
;
}
}
}
}
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
AdamOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
AdamOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
...
@@ -331,7 +434,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
...
@@ -331,7 +434,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
.
Var
()
.
Var
()
->
GetMutable
<
framework
::
SelectedRows
>
();
->
GetMutable
<
framework
::
SelectedRows
>
();
merge_func
(
ctx
.
template
device_context
<
DeviceContext
>(),
grad
,
merge_func
(
ctx
.
template
device_context
<
DeviceContext
>(),
grad
,
grad_merge_var
);
grad_merge_var
,
true
);
grad_merge_ptr
=
grad_merge_var
;
grad_merge_ptr
=
grad_merge_var
;
}
}
...
@@ -347,32 +450,46 @@ class AdamOpKernel : public framework::OpKernel<T> {
...
@@ -347,32 +450,46 @@ class AdamOpKernel : public framework::OpKernel<T> {
}
else
{
}
else
{
#endif
#endif
rows
=
grad_merge
.
rows
().
data
();
rows
=
grad_merge
.
rows
().
data
();
#if defined(PADDLE_WITH_CUDA)
#if defined(PADDLE_WITH_CUDA)
}
}
#endif
#endif
auto
row_numel
=
grad_tensor
.
numel
()
/
grad_merge
.
rows
().
size
();
auto
row_numel
=
grad_tensor
.
numel
()
/
grad_merge
.
rows
().
size
();
SparseAdamFunctor
<
T
>
functor
(
if
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()))
{
beta1
,
beta2
,
epsilon
,
beta1_pow
.
template
data
<
T
>(),
SparseAdamFunctor
<
T
,
CPUAdam
>
functor
(
beta2_pow
.
template
data
<
T
>(),
mom1
.
template
data
<
T
>(),
beta1
,
beta2
,
epsilon
,
beta1_pow
.
template
data
<
T
>(),
mom1_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
beta2_pow
.
template
data
<
T
>(),
mom1
.
template
data
<
T
>(),
mom2
.
template
data
<
T
>(),
mom1_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
mom2_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
mom2
.
template
data
<
T
>(),
lr
.
template
data
<
T
>(),
grad_data
,
param
.
template
data
<
T
>(),
mom2_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
rows
,
row_numel
,
lr
.
template
data
<
T
>(),
grad_data
,
param
.
template
data
<
T
>(),
grad_merge
.
rows
().
size
(),
lazy_mode
);
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
rows
,
row_numel
,
VLOG
(
3
)
<<
"lazy_mode :"
<<
lazy_mode
;
grad_merge
.
rows
().
size
(),
lazy_mode
);
if
(
lazy_mode
&&
platform
::
is_cpu_place
(
ctx
.
GetPlace
()))
{
size_t
row_count
=
grad_merge
.
rows
().
size
();
if
(
lazy_mode
)
{
std
::
vector
<
int64_t
>
cpu_rows
(
grad_merge
.
rows
());
size_t
row_count
=
grad_merge
.
rows
().
size
();
for
(
size_t
row_index
=
0
;
row_index
<
row_count
;
++
row_index
)
{
std
::
vector
<
int64_t
>
cpu_rows
(
grad_merge
.
rows
());
for
(
size_t
offset
=
0
;
offset
<
row_numel
;
++
offset
)
{
for
(
size_t
row_index
=
0
;
row_index
<
row_count
;
++
row_index
)
{
size_t
i
=
cpu_rows
[
row_index
]
*
row_numel
+
offset
;
for
(
size_t
offset
=
0
;
offset
<
row_numel
;
++
offset
)
{
functor
.
adam_update
(
i
,
grad_data
[
row_index
*
row_numel
+
offset
]);
size_t
i
=
cpu_rows
[
row_index
]
*
row_numel
+
offset
;
functor
.
adam_update
(
i
,
grad_data
[
row_index
*
row_numel
+
offset
]);
}
}
}
}
else
{
functor
(
param
.
numel
());
}
}
}
else
{
}
else
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
SparseAdamFunctor
<
T
,
GPUAdam
>
functor
(
beta1
,
beta2
,
epsilon
,
beta1_pow
.
template
data
<
T
>(),
beta2_pow
.
template
data
<
T
>(),
mom1
.
template
data
<
T
>(),
mom1_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
mom2
.
template
data
<
T
>(),
mom2_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
lr
.
template
data
<
T
>(),
grad_data
,
param
.
template
data
<
T
>(),
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
rows
,
row_numel
,
grad_merge
.
rows
().
size
(),
lazy_mode
);
// FIXME(minqiyang): remove BinarySearch in GPU later
platform
::
ForRange
<
DeviceContext
>
for_range
(
platform
::
ForRange
<
DeviceContext
>
for_range
(
static_cast
<
const
DeviceContext
&>
(
ctx
.
device_context
()),
static_cast
<
const
DeviceContext
&>
(
ctx
.
device_context
()),
param
.
numel
());
param
.
numel
());
...
...
paddle/fluid/operators/optimizers/momentum_op.cu
浏览文件 @
8ed02339
...
@@ -14,8 +14,11 @@ limitations under the License. */
...
@@ -14,8 +14,11 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/optimizers/momentum_op.h"
#include "paddle/fluid/operators/optimizers/momentum_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
momentum
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
momentum
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
8ed02339
...
@@ -237,7 +237,8 @@ class SparseMomentumFunctor<T, UseNesterov> {
...
@@ -237,7 +237,8 @@ class SparseMomentumFunctor<T, UseNesterov> {
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
{
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
{
auto
row_idx
=
auto
row_idx
=
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_height_
,
i
/
row_numel_
);
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_height_
,
i
/
row_numel_
);
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
0
;
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
static_cast
<
T
>
(
0
);
// put memory access in register
// put memory access in register
const
T
p
=
p_
[
i
];
const
T
p
=
p_
[
i
];
const
T
lr
=
lr_
[
0
];
const
T
lr
=
lr_
[
0
];
...
@@ -282,7 +283,8 @@ class SparseMomentumFunctor<T, NoNesterov> {
...
@@ -282,7 +283,8 @@ class SparseMomentumFunctor<T, NoNesterov> {
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
{
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
{
auto
row_idx
=
auto
row_idx
=
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_height_
,
i
/
row_numel_
);
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_height_
,
i
/
row_numel_
);
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
0
;
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
static_cast
<
T
>
(
0
);
// put memory access in register
// put memory access in register
const
T
p
=
p_
[
i
];
const
T
p
=
p_
[
i
];
const
T
lr
=
lr_
[
0
];
const
T
lr
=
lr_
[
0
];
...
...
paddle/fluid/operators/quantize_mkldnn_op.cc
0 → 100644
浏览文件 @
8ed02339
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/quantize_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
namespace
paddle
{
namespace
operators
{
using
mkldnn
::
memory
;
using
mkldnn
::
primitive
;
using
mkldnn
::
reorder
;
using
platform
::
to_void_cast
;
using
Tensor
=
framework
::
Tensor
;
using
framework
::
DataLayout
;
using
mkldnn
::
stream
;
using
platform
::
GetMKLDNNFormat
;
template
<
typename
T
>
class
QuantOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
scale_data
=
ctx
.
Attr
<
float
>
(
"Scale"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Output"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
engine
=
dev_ctx
.
GetEngine
();
std
::
vector
<
primitive
>
pipeline
;
std
::
vector
<
int
>
src_tz
=
paddle
::
framework
::
vectorize2int
(
input
->
dims
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
const
T
*
input_data
=
input
->
data
<
T
>
();
mkldnn
::
primitive_attr
attri
;
int
mask
=
0
;
attri
.
set_output_scales
(
mask
,
{
scale_data
});
auto
src_md
=
platform
::
MKLDNNMemDesc
({
src_tz
},
memory
::
data_type
::
f32
,
input
->
format
());
auto
src_pd
=
mkldnn
::
memory
::
primitive_desc
(
src_md
,
engine
);
auto
src_memory
=
std
::
make_shared
<
mkldnn
::
memory
>
(
src_pd
,
to_void_cast
<
T
>
(
input_data
));
std
::
shared_ptr
<
primitive
::
at
>
src_memory_p
=
std
::
shared_ptr
<
primitive
::
at
>
(
new
primitive
::
at
(
*
src_memory
));
bool
is_negative
=
ctx
.
Attr
<
bool
>
(
"is_negative_input"
);
std
::
shared_ptr
<
mkldnn
::
memory
::
primitive_desc
>
dst_pd
;
std
::
shared_ptr
<
mkldnn
::
memory
>
dst_memory
;
if
(
is_negative
)
{
platform
::
ConvMKLDNNHandler
::
SetDstMemory
<
int8_t
>
(
ctx
,
output
,
dst_tz
,
engine
,
dst_pd
,
dst_memory
);
}
else
{
platform
::
ConvMKLDNNHandler
::
SetDstMemory
<
uint8_t
>
(
ctx
,
output
,
dst_tz
,
engine
,
dst_pd
,
dst_memory
);
}
auto
reorder_pd
=
std
::
shared_ptr
<
reorder
::
primitive_desc
>
(
new
reorder
::
primitive_desc
(
src_pd
,
*
dst_pd
,
attri
));
auto
reorder_p
=
std
::
shared_ptr
<
reorder
>
(
new
reorder
(
*
reorder_pd
,
*
src_memory_p
,
*
dst_memory
));
pipeline
.
push_back
(
*
reorder_p
);
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_format
(
GetMKLDNNFormat
(
*
dst_memory
));
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
// TODO(Xiaoli) Support FP32->S8 quantization.
REGISTER_OP_KERNEL
(
quantize
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
QuantOpKernel
<
float
>
);
paddle/fluid/operators/quantize_op.cc
0 → 100644
浏览文件 @
8ed02339
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#include "paddle/fluid/operators/quantize_op.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace
paddle
{
namespace
operators
{
framework
::
OpKernelType
QuantOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library_
=
framework
::
LibraryType
::
kMKLDNN
;
framework
::
DataLayout
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
(),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
void
QuantOpMaker
::
Make
()
{
AddInput
(
"Input"
,
"input data"
);
AddOutput
(
"Output"
,
"output data"
);
AddAttr
<
bool
>
(
"is_negative_input"
,
"(bool, default false) Only used in mkldnn INT8 kernel"
)
.
SetDefault
(
false
);
AddAttr
<
float
>
(
"Scale"
,
"scale data"
).
SetDefault
({
1.0
f
});
AddComment
(
R"DOC(This op will quantize data from FP32 to INT8)DOC"
);
}
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
quantize
,
ops
::
QuantOp
,
ops
::
QuantOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
paddle/fluid/operators/quantize_op.h
0 → 100644
浏览文件 @
8ed02339
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
OpKernelType
;
using
framework
::
Tensor
;
class
QuantOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ctx
->
SetOutputDim
(
"Output"
,
ctx
->
GetInputDim
(
"Input"
));
ctx
->
ShareLoD
(
"Input"
,
/*->*/
"Output"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
class
QuantOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
;
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/sequence_ops/sequence_mask_op.h
浏览文件 @
8ed02339
...
@@ -52,7 +52,7 @@ class SequenceMaskOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -52,7 +52,7 @@ class SequenceMaskOpMaker : public framework::OpProtoAndCheckerMaker {
"The maximum length of the sequence. If maxlen < 0, maxlen "
"The maximum length of the sequence. If maxlen < 0, maxlen "
"= max(Input(X))."
)
"= max(Input(X))."
)
.
SetDefault
(
-
1
)
.
SetDefault
(
-
1
)
.
AddCustomChecker
([](
int
&
v
)
{
.
AddCustomChecker
([](
const
int
&
v
)
{
PADDLE_ENFORCE
(
v
<
0
||
v
>=
1
,
PADDLE_ENFORCE
(
v
<
0
||
v
>=
1
,
"Attr(maxlen) must be less than 0 or larger than 1"
);
"Attr(maxlen) must be less than 0 or larger than 1"
);
});
});
...
...
paddle/fluid/operators/split_lod_tensor_op.cc
浏览文件 @
8ed02339
...
@@ -63,7 +63,7 @@ class SplitLoDTensorOp : public framework::OperatorBase {
...
@@ -63,7 +63,7 @@ class SplitLoDTensorOp : public framework::OperatorBase {
}
}
auto
*
mask_data
=
cpu_mask
->
data
<
bool
>
();
auto
*
mask_data
=
cpu_mask
->
data
<
bool
>
();
std
::
vector
<
std
::
vector
<
CopyRange
>>
copy_ranges
(
mask_dim
[
0
]
);
std
::
vector
<
std
::
vector
<
CopyRange
>>
copy_ranges
(
2
);
// set out_true/out_false lod
// set out_true/out_false lod
for
(
size_t
t
=
0
;
t
<
2
;
t
++
)
{
for
(
size_t
t
=
0
;
t
<
2
;
t
++
)
{
...
...
paddle/fluid/operators/top_k_op.cc
浏览文件 @
8ed02339
...
@@ -21,7 +21,7 @@ class TopkOp : public framework::OperatorWithKernel {
...
@@ -21,7 +21,7 @@ class TopkOp : public framework::OperatorWithKernel {
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of TopkOp should not be null."
);
"Input(X) of TopkOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
...
@@ -44,12 +44,25 @@ class TopkOp : public framework::OperatorWithKernel {
...
@@ -44,12 +44,25 @@ class TopkOp : public framework::OperatorWithKernel {
ctx
->
ShareLoD
(
"X"
,
"Out"
);
ctx
->
ShareLoD
(
"X"
,
"Out"
);
ctx
->
ShareLoD
(
"X"
,
"Indices"
);
ctx
->
ShareLoD
(
"X"
,
"Indices"
);
}
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
framework
::
DataLayout
layout_
=
framework
::
DataLayout
::
kAnyLayout
;
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
(),
ctx
.
device_context
(),
layout_
,
library_
);
}
};
};
class
TopkOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
TopkOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor) The input of Topk op"
);
AddInput
(
"X"
,
"(Tensor) The input of Topk op"
);
AddInput
(
"K"
,
"(Tensor) Number of top elements to look for along "
"the last dimension (along each row for matrices)."
)
.
AsDispensable
();
AddOutput
(
"Out"
,
"(Tensor) The output tensor of Topk op"
);
AddOutput
(
"Out"
,
"(Tensor) The output tensor of Topk op"
);
AddOutput
(
"Indices"
,
"(Tensor) The indices of Topk elements of input"
);
AddOutput
(
"Indices"
,
"(Tensor) The indices of Topk elements of input"
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
...
...
paddle/fluid/operators/top_k_op.cu
浏览文件 @
8ed02339
...
@@ -16,6 +16,7 @@ limitations under the License. */
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/operators/top_k_op.h"
#include "paddle/fluid/operators/top_k_op.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/cuda_device_function.h"
#include "paddle/fluid/platform/cuda_device_function.h"
#include "paddle/fluid/platform/float16.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -150,7 +151,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
...
@@ -150,7 +151,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
if
(
k
<
MaxLength
-
(
*
beam
))
{
if
(
k
<
MaxLength
-
(
*
beam
))
{
topk
[
k
]
=
topk
[
k
+
*
beam
];
topk
[
k
]
=
topk
[
k
+
*
beam
];
}
else
{
}
else
{
topk
[
k
].
set
(
-
INFINITY
,
-
1
);
topk
[
k
].
set
(
-
static_cast
<
T
>
(
INFINITY
)
,
-
1
);
}
}
}
}
if
(
!
(
*
is_empty
))
{
if
(
!
(
*
is_empty
))
{
...
@@ -160,7 +161,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
...
@@ -160,7 +161,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
}
}
*
max
=
topk
[
MaxLength
-
1
];
*
max
=
topk
[
MaxLength
-
1
];
if
((
*
max
).
v
==
-
1
)
*
is_empty
=
true
;
if
((
*
max
).
v
==
-
static_cast
<
T
>
(
1
)
)
*
is_empty
=
true
;
*
beam
=
0
;
*
beam
=
0
;
}
}
}
}
...
@@ -181,7 +182,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
...
@@ -181,7 +182,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
if
(
k
<
MaxLength
-
*
beam
)
{
if
(
k
<
MaxLength
-
*
beam
)
{
topk
[
k
]
=
topk
[
k
+
*
beam
];
topk
[
k
]
=
topk
[
k
+
*
beam
];
}
else
{
}
else
{
topk
[
k
].
set
(
-
INFINITY
,
-
1
);
topk
[
k
].
set
(
-
static_cast
<
T
>
(
INFINITY
)
,
-
1
);
}
}
}
}
if
(
!
(
*
is_empty
))
{
if
(
!
(
*
is_empty
))
{
...
@@ -278,7 +279,7 @@ __global__ void KeMatrixTopK(T* output, int output_stride, int64_t* indices,
...
@@ -278,7 +279,7 @@ __global__ void KeMatrixTopK(T* output, int output_stride, int64_t* indices,
bool
firststep
=
true
;
bool
firststep
=
true
;
for
(
int
j
=
0
;
j
<
MaxLength
;
j
++
)
{
for
(
int
j
=
0
;
j
<
MaxLength
;
j
++
)
{
topk
[
j
].
set
(
-
INFINITY
,
-
1
);
topk
[
j
].
set
(
-
static_cast
<
T
>
(
INFINITY
)
,
-
1
);
}
}
while
(
top_num
)
{
while
(
top_num
)
{
ThreadGetTopK
<
T
,
MaxLength
,
BlockSize
>
(
ThreadGetTopK
<
T
,
MaxLength
,
BlockSize
>
(
...
@@ -326,6 +327,17 @@ class TopkOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -326,6 +327,17 @@ class TopkOpCUDAKernel : public framework::OpKernel<T> {
auto
*
indices
=
ctx
.
Output
<
Tensor
>
(
"Indices"
);
auto
*
indices
=
ctx
.
Output
<
Tensor
>
(
"Indices"
);
size_t
k
=
static_cast
<
int
>
(
ctx
.
Attr
<
int
>
(
"k"
));
size_t
k
=
static_cast
<
int
>
(
ctx
.
Attr
<
int
>
(
"k"
));
auto
*
k_t
=
ctx
.
Input
<
Tensor
>
(
"K"
);
if
(
k_t
)
{
Tensor
k_host
;
framework
::
TensorCopySync
(
*
k_t
,
platform
::
CPUPlace
(),
&
k_host
);
k
=
k_host
.
data
<
int
>
()[
0
];
framework
::
DDim
output_dims
=
output
->
dims
();
output_dims
[
output_dims
.
size
()
-
1
]
=
k
;
output
->
Resize
(
output_dims
);
indices
->
Resize
(
output_dims
);
}
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// FIXME(typhoonzero): data is always converted to type T?
// FIXME(typhoonzero): data is always converted to type T?
...
@@ -362,5 +374,7 @@ class TopkOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -362,5 +374,7 @@ class TopkOpCUDAKernel : public framework::OpKernel<T> {
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
REGISTER_OP_CUDA_KERNEL
(
top_k
,
paddle
::
operators
::
TopkOpCUDAKernel
<
float
>
,
REGISTER_OP_CUDA_KERNEL
(
paddle
::
operators
::
TopkOpCUDAKernel
<
double
>
);
top_k
,
paddle
::
operators
::
TopkOpCUDAKernel
<
float
>
,
paddle
::
operators
::
TopkOpCUDAKernel
<
double
>
,
paddle
::
operators
::
TopkOpCUDAKernel
<
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/top_k_op.h
浏览文件 @
8ed02339
...
@@ -37,8 +37,16 @@ class TopkKernel : public framework::OpKernel<T> {
...
@@ -37,8 +37,16 @@ class TopkKernel : public framework::OpKernel<T> {
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
indices
=
ctx
.
Output
<
Tensor
>
(
"Indices"
);
auto
*
indices
=
ctx
.
Output
<
Tensor
>
(
"Indices"
);
// k is determined by Attr
const
size_t
k
=
static_cast
<
int
>
(
ctx
.
Attr
<
int
>
(
"k"
));
size_t
k
=
static_cast
<
int
>
(
ctx
.
Attr
<
int
>
(
"k"
));
auto
*
k_t
=
ctx
.
Input
<
Tensor
>
(
"K"
);
if
(
k_t
)
{
k
=
k_t
->
data
<
int
>
()[
0
];
framework
::
DDim
output_dims
=
output
->
dims
();
output_dims
[
output_dims
.
size
()
-
1
]
=
k
;
output
->
Resize
(
output_dims
);
indices
->
Resize
(
output_dims
);
}
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int64_t
*
indices_data
=
indices
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
int64_t
*
indices_data
=
indices
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
8ed02339
...
@@ -256,10 +256,11 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
...
@@ -256,10 +256,11 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
LOG_FIRST_N
(
WARNING
,
1
)
<<
"Please NOTE: device: "
<<
place_
.
device
LOG_FIRST_N
(
WARNING
,
1
)
<<
"Please NOTE: device: "
<<
place_
.
device
<<
", CUDA Capability: "
<<
compute_capability_
<<
", CUDA Capability: "
<<
compute_capability_
<<
", Driver Version: "
<<
driver_version_
/
1000
<<
", Driver
API
Version: "
<<
driver_version_
/
1000
<<
"."
<<
(
driver_version_
%
100
)
/
10
<<
"."
<<
(
driver_version_
%
100
)
/
10
<<
", Runtime Version: "
<<
runtime_version_
/
1000
<<
", Runtime API Version: "
<<
"."
<<
(
runtime_version_
%
100
)
/
10
;
<<
runtime_version_
/
1000
<<
"."
<<
(
runtime_version_
%
100
)
/
10
;
size_t
cudnn_dso_ver
=
dynload
::
cudnnGetVersion
();
size_t
cudnn_dso_ver
=
dynload
::
cudnnGetVersion
();
LOG_FIRST_N
(
WARNING
,
1
)
<<
"device: "
<<
place_
.
device
LOG_FIRST_N
(
WARNING
,
1
)
<<
"device: "
<<
place_
.
device
<<
", cuDNN Version: "
<<
cudnn_dso_ver
/
1000
<<
"."
<<
", cuDNN Version: "
<<
cudnn_dso_ver
/
1000
<<
"."
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
8ed02339
...
@@ -41,7 +41,28 @@ limitations under the License. */
...
@@ -41,7 +41,28 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
/*! \brief device temporary allocator singleton */
/*! \brief device temporary allocator singleton.
*
* Some operator needs temporary memory during computation, for example,
* conv_gemm, which needs use col to store the result of im2col. If we
* create a stack memory which is used by CUDA Kernel, before the
* Computation(...) returns, we should add ctx->Wait(), because the
* execution of CUDA is async, if there doesn't have ctx->Wait(),
* the temporary memory will be released before the CUDA Kernel uses
* it.
*
* DeviceTemporaryAllocator is a singleton, which contains a
* `TemporaryAllocator` for each <Place, Stream>. And the TemporaryAllocator
* contains a temp_allocation_queue which is used to store the temporary
* allocations. The allocation, which is allocated by TemporaryAllocator,
* is a unique_ptr, and when it is not held by any variable, it will be
* pushed into the temp_allocation_queue. There are two opportunities to free
* the allocations of temp_allocation_queue:
* - when the Stream calls cudaStreamSynchronize;
* - when the allocation size of opportunities exceeds a certain threshold
* (defined by FLAGS_limit_of_temporary_allocation).
*
* */
class
DeviceTemporaryAllocator
{
class
DeviceTemporaryAllocator
{
public:
public:
static
DeviceTemporaryAllocator
&
Instance
()
{
static
DeviceTemporaryAllocator
&
Instance
()
{
...
...
paddle/fluid/platform/mkldnn_reuse.h
浏览文件 @
8ed02339
...
@@ -15,6 +15,7 @@ limitations under the License. */
...
@@ -15,6 +15,7 @@ limitations under the License. */
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
...
@@ -181,6 +182,21 @@ class MKLDNNHandler {
...
@@ -181,6 +182,21 @@ class MKLDNNHandler {
return
dims2str
(
operand_dims
)
+
suffix
;
return
dims2str
(
operand_dims
)
+
suffix
;
}
}
template
<
typename
M
>
static
void
SetDstMemory
(
const
framework
::
ExecutionContext
&
ctx
,
framework
::
Tensor
*
output
,
std
::
vector
<
int
>
dst_tz
,
const
mkldnn
::
engine
&
engine
,
std
::
shared_ptr
<
mkldnn
::
memory
::
primitive_desc
>&
dst_pd
,
// NOLINT
std
::
shared_ptr
<
mkldnn
::
memory
>&
dst_memory
)
{
// NOLINT
M
*
output_data
=
output
->
mutable_data
<
M
>
(
ctx
.
GetPlace
());
auto
dst_md
=
platform
::
MKLDNNMemDesc
(
{
dst_tz
},
paddle
::
framework
::
ToMKLDNNDataType
(
framework
::
DataTypeTrait
<
M
>::
DataType
),
mkldnn
::
memory
::
format
::
nhwc
);
dst_pd
.
reset
(
new
mkldnn
::
memory
::
primitive_desc
(
dst_md
,
engine
));
dst_memory
.
reset
(
new
mkldnn
::
memory
(
*
dst_pd
,
to_void_cast
<
M
>
(
output_data
)));
}
protected:
protected:
static
std
::
string
dims2str
(
const
mkldnn
::
memory
::
dims
&
operand_dims
)
{
static
std
::
string
dims2str
(
const
mkldnn
::
memory
::
dims
&
operand_dims
)
{
std
::
string
dstr
=
""
;
std
::
string
dstr
=
""
;
...
...
paddle/fluid/platform/nccl_helper.h
浏览文件 @
8ed02339
...
@@ -23,6 +23,7 @@
...
@@ -23,6 +23,7 @@
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#define NCCL_ID_VARNAME "NCCLID"
#define NCCL_ID_VARNAME "NCCLID"
...
@@ -38,6 +39,8 @@ inline ncclDataType_t ToNCCLDataType(framework::proto::VarType::Type type) {
...
@@ -38,6 +39,8 @@ inline ncclDataType_t ToNCCLDataType(framework::proto::VarType::Type type) {
return
ncclInt
;
return
ncclInt
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
INT64
)
{
}
else
if
(
type
==
framework
::
proto
::
VarType
::
INT64
)
{
return
ncclInt64
;
return
ncclInt64
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
FP16
)
{
return
ncclFloat16
;
}
else
{
}
else
{
PADDLE_THROW
(
"Not supported"
);
PADDLE_THROW
(
"Not supported"
);
}
}
...
...
paddle/fluid/platform/temporary_allocator.h
浏览文件 @
8ed02339
...
@@ -29,6 +29,19 @@ class TemporaryAllocation : public memory::allocation::Allocation {
...
@@ -29,6 +29,19 @@ class TemporaryAllocation : public memory::allocation::Allocation {
memory
::
allocation
::
AllocationPtr
underlying_allocation_
;
memory
::
allocation
::
AllocationPtr
underlying_allocation_
;
};
};
/*! \brief the TemporaryAllocator is used to alloc the temporary allocation
* which used by CUDA's async operation.
*
* The TemporaryAllocator contains a temp_allocation_queue which
* is used to store the temporary allocations. The allocation, which is
* allocated by TemporaryAllocator, is a unique_ptr, and when it is not held
* by any variable, it will be pushed into the temp_allocation_queue.
*
* There is one opportunity to free the allocations of temp_allocation_queue:
* - when the allocation size of opportunities exceeds a certain threshold
* (defined by FLAGS_limit_of_temporary_allocation).
*
* */
class
TemporaryAllocator
:
public
memory
::
allocation
::
Allocator
{
class
TemporaryAllocator
:
public
memory
::
allocation
::
Allocator
{
public:
public:
explicit
TemporaryAllocator
(
platform
::
Place
place
);
explicit
TemporaryAllocator
(
platform
::
Place
place
);
...
...
paddle/fluid/platform/temporary_allocator_test.cc
浏览文件 @
8ed02339
...
@@ -14,8 +14,7 @@
...
@@ -14,8 +14,7 @@
#include "paddle/fluid/platform/temporary_allocator.h"
#include "paddle/fluid/platform/temporary_allocator.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/create_tensor_with_allocationptr.h"
DECLARE_double
(
limit_of_temporary_allocation
);
DECLARE_double
(
limit_of_temporary_allocation
);
namespace
paddle
{
namespace
paddle
{
...
@@ -47,6 +46,7 @@ TEST(temporary_allocator, temporary_allocator) {
...
@@ -47,6 +46,7 @@ TEST(temporary_allocator, temporary_allocator) {
TEST
(
temporary_allocator
,
add_callback
)
{
TEST
(
temporary_allocator
,
add_callback
)
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
const
double
limit
=
FLAGS_limit_of_temporary_allocation
;
FLAGS_limit_of_temporary_allocation
=
10
;
FLAGS_limit_of_temporary_allocation
=
10
;
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
CUDAPlace
gpu_place
(
0
);
TemporaryAllocator
gpu_alloc
(
gpu_place
);
TemporaryAllocator
gpu_alloc
(
gpu_place
);
...
@@ -63,7 +63,7 @@ TEST(temporary_allocator, add_callback) {
...
@@ -63,7 +63,7 @@ TEST(temporary_allocator, add_callback) {
});
});
{
gpu_alloc
.
Allocate
(
100
);
}
{
gpu_alloc
.
Allocate
(
100
);
}
PADDLE_ENFORCE
(
deleted
);
PADDLE_ENFORCE
(
deleted
);
FLAGS_limit_of_temporary_allocation
=
-
1
;
FLAGS_limit_of_temporary_allocation
=
limit
;
#endif
#endif
}
}
...
@@ -75,8 +75,8 @@ TEST(temporary_allocator, create_tensor_with_allocationptr) {
...
@@ -75,8 +75,8 @@ TEST(temporary_allocator, create_tensor_with_allocationptr) {
auto
allocation
=
cpu_alloc
.
Allocate
(
memory_size
);
auto
allocation
=
cpu_alloc
.
Allocate
(
memory_size
);
void
*
address
=
allocation
->
ptr
();
void
*
address
=
allocation
->
ptr
();
int
numel
=
memory_size
/
sizeof
(
float
);
int
numel
=
memory_size
/
sizeof
(
float
);
framework
::
Tensor
tensor
=
framework
::
Tensor
tensor
=
framework
::
GetTensor
<
float
>
(
GetTensor
<
float
>
(
std
::
move
(
allocation
),
framework
::
make_ddim
({
numel
}));
std
::
move
(
allocation
),
framework
::
make_ddim
({
numel
}));
PADDLE_ENFORCE_EQ
(
address
,
tensor
.
data
<
float
>
());
PADDLE_ENFORCE_EQ
(
address
,
tensor
.
data
<
float
>
());
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
}
}
...
@@ -90,8 +90,8 @@ TEST(temporary_allocator, create_tensor_with_allocationptr) {
...
@@ -90,8 +90,8 @@ TEST(temporary_allocator, create_tensor_with_allocationptr) {
auto
allocation
=
gpu_alloc
.
Allocate
(
memory_size
);
auto
allocation
=
gpu_alloc
.
Allocate
(
memory_size
);
void
*
address
=
allocation
->
ptr
();
void
*
address
=
allocation
->
ptr
();
int
numel
=
memory_size
/
sizeof
(
float
);
int
numel
=
memory_size
/
sizeof
(
float
);
framework
::
Tensor
tensor
=
framework
::
Tensor
tensor
=
framework
::
GetTensor
<
float
>
(
GetTensor
<
float
>
(
std
::
move
(
allocation
),
framework
::
make_ddim
({
numel
}));
std
::
move
(
allocation
),
framework
::
make_ddim
({
numel
}));
PADDLE_ENFORCE_EQ
(
address
,
tensor
.
data
<
float
>
());
PADDLE_ENFORCE_EQ
(
address
,
tensor
.
data
<
float
>
());
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
}
}
...
@@ -116,7 +116,7 @@ TEST(temporary_allocator, create_tensor_with_allocationptr2) {
...
@@ -116,7 +116,7 @@ TEST(temporary_allocator, create_tensor_with_allocationptr2) {
{
{
auto
allocation
=
cpu_alloc
.
Allocate
(
memory_size
);
auto
allocation
=
cpu_alloc
.
Allocate
(
memory_size
);
address
=
allocation
->
ptr
();
address
=
allocation
->
ptr
();
framework
::
Tensor
tensor
=
GetTensor
<
float
>
(
framework
::
Tensor
tensor
=
framework
::
GetTensor
<
float
>
(
std
::
move
(
allocation
),
framework
::
make_ddim
({
numel
}));
std
::
move
(
allocation
),
framework
::
make_ddim
({
numel
}));
PADDLE_ENFORCE_EQ
(
address
,
tensor
.
data
<
float
>
());
PADDLE_ENFORCE_EQ
(
address
,
tensor
.
data
<
float
>
());
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
...
@@ -138,7 +138,7 @@ TEST(temporary_allocator, create_tensor_with_allocationptr2) {
...
@@ -138,7 +138,7 @@ TEST(temporary_allocator, create_tensor_with_allocationptr2) {
{
{
auto
allocation
=
gpu_alloc
.
Allocate
(
memory_size
);
auto
allocation
=
gpu_alloc
.
Allocate
(
memory_size
);
address
=
allocation
->
ptr
();
address
=
allocation
->
ptr
();
framework
::
Tensor
tensor
=
GetTensor
<
float
>
(
framework
::
Tensor
tensor
=
framework
::
GetTensor
<
float
>
(
std
::
move
(
allocation
),
framework
::
make_ddim
({
numel
}));
std
::
move
(
allocation
),
framework
::
make_ddim
({
numel
}));
PADDLE_ENFORCE_EQ
(
address
,
tensor
.
data
<
float
>
());
PADDLE_ENFORCE_EQ
(
address
,
tensor
.
data
<
float
>
());
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
...
...
paddle/fluid/pybind/CMakeLists.txt
浏览文件 @
8ed02339
set
(
PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer
)
set
(
PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer
scope_pool
)
if
(
WITH_PYTHON
)
if
(
WITH_PYTHON
)
list
(
APPEND PYBIND_DEPS py_func_op
)
list
(
APPEND PYBIND_DEPS py_func_op
)
endif
()
endif
()
...
...
paddle/fluid/pybind/const_value.cc
浏览文件 @
8ed02339
...
@@ -49,6 +49,9 @@ void BindConstValue(pybind11::module* m) {
...
@@ -49,6 +49,9 @@ void BindConstValue(pybind11::module* m) {
op_proto_and_checker_maker
.
def
(
op_proto_and_checker_maker
.
def
(
"kOpNameScopeAttrName"
,
"kOpNameScopeAttrName"
,
framework
::
OpProtoAndCheckerMaker
::
OpNamescopeAttrName
);
framework
::
OpProtoAndCheckerMaker
::
OpNamescopeAttrName
);
op_proto_and_checker_maker
.
def
(
"kOpCreationCallstackAttrName"
,
framework
::
OpProtoAndCheckerMaker
::
OpCreationCallstackAttrName
);
}
}
}
// namespace pybind
}
// namespace pybind
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
8ed02339
...
@@ -32,6 +32,7 @@ limitations under the License. */
...
@@ -32,6 +32,7 @@ limitations under the License. */
#include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/scope_pool.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/layer.h"
...
@@ -117,6 +118,9 @@ PYBIND11_MODULE(core, m) {
...
@@ -117,6 +118,9 @@ PYBIND11_MODULE(core, m) {
return
paddle
::
operators
::
AppendPythonCallableObjectAndReturnId
(
py_obj
);
return
paddle
::
operators
::
AppendPythonCallableObjectAndReturnId
(
py_obj
);
});
});
m
.
add_object
(
"_cleanup"
,
py
::
capsule
([]()
{
ScopePool
::
Instance
().
Clear
();
}));
py
::
class_
<
imperative
::
VarBase
,
PyVarBase
>
(
m
,
"VarBase"
,
R"DOC()DOC"
)
py
::
class_
<
imperative
::
VarBase
,
PyVarBase
>
(
m
,
"VarBase"
,
R"DOC()DOC"
)
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<>
())
.
def
(
"_run_backward"
,
.
def
(
"_run_backward"
,
...
@@ -454,7 +458,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -454,7 +458,7 @@ All parameter, weight, gradient are variables in Paddle.
},
},
py
::
return_value_policy
::
copy
);
py
::
return_value_policy
::
copy
);
py
::
class_
<
Scope
>
(
m
,
"Scope"
,
R"DOC(
py
::
class_
<
Scope
>
(
m
,
"
_
Scope"
,
R"DOC(
Scope is an association of a name to Variable. All variables belong to Scope.
Scope is an association of a name to Variable. All variables belong to Scope.
Variables in a parent scope can be retrieved from local scope.
Variables in a parent scope can be retrieved from local scope.
...
@@ -474,17 +478,26 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -474,17 +478,26 @@ All parameter, weight, gradient are variables in Paddle.
param.set(param_array, place)
param.set(param_array, place)
)DOC"
)
)DOC"
)
.
def
(
"_remove_from_pool"
,
[](
Scope
&
self
)
{
ScopePool
::
Instance
().
Remove
(
&
self
);
})
.
def
(
"var"
,
.
def
(
"var"
,
[](
Scope
&
self
,
const
std
::
string
&
name
)
->
Variable
*
{
[](
Scope
&
self
,
const
std
::
string
&
name
)
->
Variable
*
{
return
self
.
Var
(
name
);
return
self
.
Var
(
name
);
},
},
py
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
.
def
(
"find_var"
,
&
Scope
::
FindVar
,
py
::
return_value_policy
::
reference
)
.
def
(
"find_var"
,
&
Scope
::
FindVar
,
py
::
return_value_policy
::
reference
)
.
def
(
py
::
init
<>
())
.
def
(
"new_scope"
,
[](
Scope
&
self
)
->
Scope
*
{
return
&
self
.
NewScope
();
},
.
def
(
"new_scope"
,
[](
Scope
&
self
)
->
Scope
*
{
return
&
self
.
NewScope
();
},
py
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
.
def
(
"drop_kids"
,
&
Scope
::
DropKids
);
.
def
(
"drop_kids"
,
&
Scope
::
DropKids
);
m
.
def
(
"Scope"
,
[]()
->
Scope
*
{
auto
*
s
=
new
Scope
();
ScopePool
::
Instance
().
Insert
(
std
::
unique_ptr
<
Scope
>
(
s
));
return
s
;
},
py
::
return_value_policy
::
reference
);
//! @note: Be careful! PyBind will return std::string as an unicode, not
//! @note: Be careful! PyBind will return std::string as an unicode, not
//! Python str. If you want a str object, you should cast them in Python.
//! Python str. If you want a str object, you should cast them in Python.
m
.
def
(
"get_all_op_protos"
,
[]()
->
std
::
vector
<
py
::
bytes
>
{
m
.
def
(
"get_all_op_protos"
,
[]()
->
std
::
vector
<
py
::
bytes
>
{
...
...
paddle/scripts/installation_validate.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle.fluid
as
fluid
import
paddle
as
pd
print
(
pd
.
__version__
)
paddle/scripts/paddle_build.sh
浏览文件 @
8ed02339
...
@@ -79,6 +79,7 @@ function cmake_gen() {
...
@@ -79,6 +79,7 @@ function cmake_gen() {
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/Library/Frameworks/Python.framework/Versions/2.7/bin/python2.7
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/Library/Frameworks/Python.framework/Versions/2.7/bin/python2.7
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/2.7/include/python2.7
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/2.7/include/python2.7
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/2.7/lib/libpython2.7.dylib"
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/2.7/lib/libpython2.7.dylib"
pip
install
--user
-r
${
PADDLE_ROOT
}
/python/requirements.txt
else
else
exit
1
exit
1
fi
fi
...
@@ -91,6 +92,7 @@ function cmake_gen() {
...
@@ -91,6 +92,7 @@ function cmake_gen() {
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.5/include/python3.5m/
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.5/include/python3.5m/
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/lib/libpython3.5m.dylib"
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/lib/libpython3.5m.dylib"
WITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
ON
}
WITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
ON
}
pip3.5
install
--user
-r
${
PADDLE_ROOT
}
/python/requirements.txt
else
else
exit
1
exit
1
fi
fi
...
@@ -103,6 +105,7 @@ function cmake_gen() {
...
@@ -103,6 +105,7 @@ function cmake_gen() {
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.6/include/python3.6m/
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.6/include/python3.6m/
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.6/lib/libpython3.6m.dylib"
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.6/lib/libpython3.6m.dylib"
WITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
ON
}
WITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
ON
}
pip3.6
install
--user
-r
${
PADDLE_ROOT
}
/python/requirements.txt
else
else
exit
1
exit
1
fi
fi
...
@@ -115,6 +118,7 @@ function cmake_gen() {
...
@@ -115,6 +118,7 @@ function cmake_gen() {
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.7/include/python3.7m/
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.7/include/python3.7m/
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.7/lib/libpython3.7m.dylib"
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.7/lib/libpython3.7m.dylib"
WITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
ON
}
WITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
ON
}
pip3.7
install
--user
-r
${
PADDLE_ROOT
}
/python/requirements.txt
else
else
exit
1
exit
1
fi
fi
...
@@ -441,7 +445,9 @@ EOF
...
@@ -441,7 +445,9 @@ EOF
# make install should also be test when unittest
# make install should also be test when unittest
make
install
-j
8
make
install
-j
8
if
[
"
$1
"
==
"cp27-cp27m"
]
;
then
if
[
"
$1
"
==
"cp27-cp27m"
]
;
then
set
-e
pip
install
--user
${
INSTALL_PREFIX
:-
/paddle/build
}
/opt/paddle/share/wheels/
*
.whl
pip
install
--user
${
INSTALL_PREFIX
:-
/paddle/build
}
/opt/paddle/share/wheels/
*
.whl
python
${
PADDLE_ROOT
}
/paddle/scripts/installation_validate.py
elif
[
"
$1
"
==
"cp35-cp35m"
]
;
then
elif
[
"
$1
"
==
"cp35-cp35m"
]
;
then
pip3.5
install
--user
${
INSTALL_PREFIX
:-
/paddle/build
}
/opt/paddle/share/wheels/
*
.whl
pip3.5
install
--user
${
INSTALL_PREFIX
:-
/paddle/build
}
/opt/paddle/share/wheels/
*
.whl
elif
[
"
$1
"
==
"cp36-cp36m"
]
;
then
elif
[
"
$1
"
==
"cp36-cp36m"
]
;
then
...
...
python/paddle/fluid/__init__.py
浏览文件 @
8ed02339
...
@@ -46,7 +46,7 @@ from . import transpiler
...
@@ -46,7 +46,7 @@ from . import transpiler
from
.
import
distribute_lookup_table
from
.
import
distribute_lookup_table
from
.param_attr
import
ParamAttr
,
WeightNormParamAttr
from
.param_attr
import
ParamAttr
,
WeightNormParamAttr
from
.data_feeder
import
DataFeeder
from
.data_feeder
import
DataFeeder
from
.core
import
LoDTensor
,
LoDTensorArray
,
CPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
,
Scope
from
.core
import
LoDTensor
,
LoDTensorArray
,
CPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
,
Scope
,
_Scope
from
.transpiler
import
DistributeTranspiler
,
\
from
.transpiler
import
DistributeTranspiler
,
\
memory_optimize
,
release_memory
,
DistributeTranspilerConfig
memory_optimize
,
release_memory
,
DistributeTranspilerConfig
from
.lod_tensor
import
create_lod_tensor
,
create_random_int_lodtensor
from
.lod_tensor
import
create_lod_tensor
,
create_random_int_lodtensor
...
...
python/paddle/fluid/contrib/__init__.py
浏览文件 @
8ed02339
...
@@ -22,6 +22,8 @@ from . import op_frequence
...
@@ -22,6 +22,8 @@ from . import op_frequence
from
.op_frequence
import
*
from
.op_frequence
import
*
from
.
import
quantize
from
.
import
quantize
from
.quantize
import
*
from
.quantize
import
*
from
.
import
slim
from
.slim
import
*
from
.
import
utils
from
.
import
utils
from
.utils
import
*
from
.utils
import
*
...
@@ -30,4 +32,5 @@ __all__ += decoder.__all__
...
@@ -30,4 +32,5 @@ __all__ += decoder.__all__
__all__
+=
memory_usage_calc
.
__all__
__all__
+=
memory_usage_calc
.
__all__
__all__
+=
op_frequence
.
__all__
__all__
+=
op_frequence
.
__all__
__all__
+=
quantize
.
__all__
__all__
+=
quantize
.
__all__
__all__
+=
slim
.
__all__
__all__
+=
utils
.
__all__
__all__
+=
utils
.
__all__
python/paddle/fluid/contrib/slim/__init__.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.core
import
*
from
.graph
import
*
from
.prune
import
*
__all__
=
[
'build_compressor'
,
'CompressPass'
,
'ImitationGraph'
,
'SensitivePruneStrategy'
,
'MagnitudePruner'
,
'RatioPruner'
,
]
python/paddle/fluid/contrib/slim/core/__init__.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.
import
config
from
.config
import
*
from
.
import
compress_pass
from
.compress_pass
import
*
from
.
import
strategy
from
.strategy
import
*
from
.
import
pass_builder
from
.pass_builder
import
*
__all__
=
config
.
__all__
+
compress_pass
.
__all__
+
strategy
.
__all__
+
pass_builder
.
__all__
python/paddle/fluid/contrib/slim/core/compress_pass.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
....core
import
CPUPlace
from
..graph
import
get_executor
__all__
=
[
'Context'
,
'CompressPass'
]
class
Context
(
object
):
"""
The context in the process of compression.
Args:
exe: The executor used to execute graph.
graph: The graph to be compressed.
scope: The scope used to execute graph.
program_exe: The program_exe is used to execute the program
created for modifying the variables in scope.
"""
def
__init__
(
self
,
exe
,
graph
,
scope
,
program_exe
=
None
):
# The total number of epoches to be trained.
self
.
epoch
=
0
# Current epoch
self
.
epoch_id
=
0
# Current batch
self
.
batch_id
=
0
self
.
exe
=
exe
self
.
graph
=
graph
self
.
scope
=
scope
self
.
program_exe
=
program_exe
class
CompressPass
(
object
):
"""
The pass used to compress model.
Args:
place: The device used in compression.
data_reader: The data_reader used to run graph.
data_feeder: The data_feeder used to run graph.
scope: The scope used to run graph.
metrics: The metrics for evaluating model.
epoch: The total epoches of trainning in compression.
program_exe: The program_exe is used to execute the program
created for modifying the variables in scope.
"""
def
__init__
(
self
,
place
=
None
,
data_reader
=
None
,
data_feeder
=
None
,
scope
=
None
,
metrics
=
None
,
epoch
=
None
,
program_exe
=
None
):
self
.
strategies
=
[]
self
.
place
=
CPUPlace
()
if
place
is
None
else
place
self
.
data_reader
=
data_reader
self
.
data_feeder
=
data_feeder
self
.
scope
=
scope
self
.
metrics
=
metrics
self
.
epoch
=
epoch
self
.
program_exe
=
program_exe
def
add_strategy
(
self
,
strategy
):
"""
Add a strategy to current compress pass.
Args:
strategy: The strategy to be added into current compress pass.
"""
self
.
strategies
.
append
(
strategy
)
self
.
epoch
=
max
(
strategy
.
end_epoch
,
self
.
epoch
)
def
apply
(
self
,
graph
):
"""
Compress a model.
Args:
graph: The target graph to be compressed.
"""
self
.
executor
=
get_executor
(
graph
,
self
.
place
)
context
=
Context
(
self
.
executor
,
graph
,
self
.
scope
,
program_exe
=
self
.
program_exe
)
for
strategy
in
self
.
strategies
:
strategy
.
on_compress_begin
(
context
)
for
epoch
in
range
(
self
.
epoch
):
for
strategy
in
self
.
strategies
:
strategy
.
on_epoch_begin
(
context
)
for
data
in
self
.
data_reader
():
for
strategy
in
self
.
strategies
:
strategy
.
on_batch_begin
(
context
)
fetches
=
None
if
self
.
metrics
:
fetches
=
self
.
metrics
.
values
()
feed
=
None
if
self
.
data_feeder
:
feed
=
self
.
data_feeder
.
feed
(
data
)
results
=
self
.
executor
.
run
(
graph
,
fetches
=
fetches
,
scope
=
self
.
scope
,
feed
=
feed
)
if
results
:
print
(
"results: {}"
.
format
(
zip
(
self
.
metrics
.
keys
(),
results
)))
for
strategy
in
self
.
strategies
:
strategy
.
on_batch_end
(
context
)
context
.
batch_id
+=
1
for
strategy
in
self
.
strategies
:
strategy
.
on_epoch_end
(
context
)
context
.
epoch_id
+=
1
for
strategy
in
self
.
strategies
:
strategy
.
on_compress_end
(
context
)
python/paddle/fluid/contrib/slim/core/config.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
inspect
import
funcsigs
import
yaml
from
collections
import
OrderedDict
from
..prune
import
*
from
.compress_pass
import
*
from
.strategy
import
*
__all__
=
[
'ConfigFactory'
]
"""This factory is used to create instances by loading and parsing configure file with yaml format.
"""
class
ConfigFactory
(
object
):
def
__init__
(
self
,
config
):
"""Init a factory from configure file."""
self
.
instances
=
{}
self
.
version
=
None
self
.
_parse_config
(
config
)
def
get_compress_pass
(
self
):
"""
Get compress pass from factory.
"""
return
self
.
instance
(
'compress_pass'
)
def
instance
(
self
,
name
):
"""
Get instance from factory.
"""
if
name
in
self
.
instances
:
return
self
.
instances
[
name
]
else
:
return
None
def
_new_instance
(
self
,
name
,
attrs
):
if
name
not
in
self
.
instances
:
class_
=
globals
()[
attrs
[
'class'
]]
sig
=
funcsigs
.
signature
(
class_
.
__init__
)
keys
=
[
param
.
name
for
param
in
sig
.
parameters
.
values
()
if
(
param
.
kind
==
param
.
POSITIONAL_OR_KEYWORD
)
][
1
:]
keys
=
set
(
attrs
.
keys
()).
intersection
(
set
(
keys
))
args
=
{}
for
key
in
keys
:
value
=
attrs
[
key
]
if
isinstance
(
value
,
str
)
and
value
in
self
.
instances
:
value
=
self
.
instances
[
value
]
args
[
key
]
=
value
self
.
instances
[
name
]
=
class_
(
**
args
)
return
self
.
instances
.
get
(
name
)
def
_parse_config
(
self
,
config
):
assert
config
with
open
(
config
,
'r'
)
as
config_file
:
key_values
=
self
.
_ordered_load
(
config_file
)
for
key
in
key_values
:
# parse version
if
key
==
'version'
and
self
.
version
is
None
:
self
.
version
=
int
(
key_values
[
'version'
])
assert
self
.
version
==
int
(
key_values
[
'version'
])
# parse pruners
if
key
==
'pruners'
or
key
==
'strategies'
:
instances
=
key_values
[
key
]
for
name
in
instances
:
self
.
_new_instance
(
name
,
instances
[
name
])
if
key
==
'compress_pass'
:
compress_pass
=
self
.
_new_instance
(
key
,
key_values
[
key
])
for
name
in
key_values
[
key
][
'strategies'
]:
strategy
=
self
.
instance
(
name
)
compress_pass
.
add_strategy
(
strategy
)
if
key
==
'include'
:
for
config_file
in
key_values
[
key
]:
self
.
_parse_config
(
config_file
.
strip
())
def
_ordered_load
(
self
,
stream
,
Loader
=
yaml
.
Loader
,
object_pairs_hook
=
OrderedDict
):
"""
See: https://stackoverflow.com/questions/5121931/in-python-how-can-you-load-yaml-mappings-as-ordereddicts
"""
class
OrderedLoader
(
Loader
):
pass
def
construct_mapping
(
loader
,
node
):
loader
.
flatten_mapping
(
node
)
return
object_pairs_hook
(
loader
.
construct_pairs
(
node
))
OrderedLoader
.
add_constructor
(
yaml
.
resolver
.
BaseResolver
.
DEFAULT_MAPPING_TAG
,
construct_mapping
)
return
yaml
.
load
(
stream
,
OrderedLoader
)
python/paddle/fluid/contrib/slim/core/pass_builder.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.compress_pass
import
CompressPass
from
.config
import
ConfigFactory
__all__
=
[
'build_compressor'
]
def
build_compressor
(
place
=
None
,
data_reader
=
None
,
data_feeder
=
None
,
scope
=
None
,
metrics
=
None
,
epoch
=
None
,
config
=
None
):
if
config
is
not
None
:
factory
=
ConfigFactory
(
config
)
comp_pass
=
factory
.
get_compress_pass
()
else
:
comp_pass
=
CompressPass
()
comp_pass
.
place
=
place
comp_pass
.
data_reader
=
data_reader
comp_pass
.
data_feeder
=
data_feeder
comp_pass
.
scope
=
scope
comp_pass
.
metrics
=
metrics
comp_pass
.
epoch
=
epoch
return
comp_pass
python/paddle/fluid/contrib/slim/core/strategy.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
'Strategy'
]
class
Strategy
(
object
):
"""
Base class for all strategies.
"""
def
__init__
(
self
,
start_epoch
=
0
,
end_epoch
=
10
):
"""
Args:
start_epoch: The first epoch to apply the strategy.
end_epoch: The last epoch to apply the strategy.
"""
self
.
start_epoch
=
start_epoch
self
.
end_epoch
=
end_epoch
def
on_compress_begin
(
self
,
context
):
pass
def
on_epoch_begin
(
self
,
context
):
pass
def
on_epoch_end
(
self
,
context
):
pass
def
on_batch_begin
(
self
,
context
):
pass
def
on_batch_end
(
self
,
context
):
pass
def
on_compress_end
(
self
,
context
):
pass
python/paddle/fluid/contrib/slim/demo/filter_prune/config.yaml
0 → 100644
浏览文件 @
8ed02339
version
:
1.0
pruners
:
pruner_1
:
class
:
'
RatioPruner'
ratios
:
'
conv1_1.w'
:
0.3
'
conv1_2.w'
:
0.4
'
*'
:
0.9
group_dims
:
'
*'
:
[
1
,
2
,
3
]
criterions
:
'
*'
:
'
l1-norm'
strategies
:
strategy_1
:
class
:
'
SensitivePruneStrategy'
pruner
:
'
pruner_1'
start_epoch
:
0
end_epoch
:
10
delta_rate
:
0.20
acc_loss_threshold
:
0.2
sensitivities
:
'
conv1_1.w'
:
0.4
compress_pass
:
class
:
'
CompressPass'
epoch
:
100
strategies
:
-
strategy_1
python/paddle/fluid/contrib/slim/demo/filter_prune/demo.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle.fluid
as
fluid
import
paddle
import
os
import
sys
from
paddle.fluid.contrib.slim
import
CompressPass
from
paddle.fluid.contrib.slim
import
build_compressor
from
paddle.fluid.contrib.slim
import
ImitationGraph
class
LinearModel
(
object
):
def
__init__
(
slef
):
pass
def
train
(
self
):
train_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
startup_program
.
random_seed
=
10
with
fluid
.
program_guard
(
train_program
,
startup_program
):
x
=
fluid
.
layers
.
data
(
name
=
'x'
,
shape
=
[
13
],
dtype
=
'float32'
)
y
=
fluid
.
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
dtype
=
'float32'
)
predict
=
fluid
.
layers
.
fc
(
input
=
x
,
size
=
1
,
act
=
None
)
cost
=
fluid
.
layers
.
square_error_cost
(
input
=
predict
,
label
=
y
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
eval_program
=
train_program
.
clone
()
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
sgd_optimizer
.
minimize
(
avg_cost
)
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
uci_housing
.
train
(),
batch_size
=
1
)
eval_reader
=
paddle
.
batch
(
paddle
.
dataset
.
uci_housing
.
test
(),
batch_size
=
1
)
place
=
fluid
.
CPUPlace
()
train_feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
x
,
y
])
eval_feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
x
,
y
])
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_program
)
train_metrics
=
{
"loss"
:
avg_cost
.
name
}
eval_metrics
=
{
"loss"
:
avg_cost
.
name
}
graph
=
ImitationGraph
(
train_program
)
config
=
'./config.yaml'
comp_pass
=
build_compressor
(
place
,
data_reader
=
train_reader
,
data_feeder
=
train_feeder
,
scope
=
fluid
.
global_scope
(),
metrics
=
train_metrics
,
epoch
=
1
,
config
=
config
)
comp_pass
.
apply
(
graph
)
if
__name__
==
"__main__"
:
model
=
LinearModel
()
model
.
train
()
python/paddle/fluid/contrib/slim/graph/__init__.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.
import
executor
from
.executor
import
*
from
.
import
graph
from
.graph
import
*
from
.
import
graph_pass
from
.graph_pass
import
*
__all__
=
executor
.
__all__
__all__
+=
graph
.
__all__
__all__
+=
graph_pass
.
__all__
python/paddle/fluid/contrib/slim/graph/executor.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
abc
from
abc
import
abstractmethod
from
....
import
executor
from
.graph
import
IRGraph
,
ImitationGraph
__all__
=
[
'get_executor'
]
class
GraphExecutor
(
object
):
__metaclass__
=
abc
.
ABCMeta
def
__init__
(
self
,
place
):
self
.
place
=
place
@
abstractmethod
def
run
(
self
,
graph
,
feches
=
None
,
feed
=
None
):
pass
class
IRGraphExecutor
(
GraphExecutor
):
def
run
(
self
,
grah
,
fetches
,
feed
=
None
):
pass
class
ImitationGraphExecutor
(
GraphExecutor
):
def
__init__
(
self
,
place
):
super
(
ImitationGraphExecutor
,
self
).
__init__
(
place
)
self
.
exe
=
executor
.
Executor
(
place
)
def
run
(
self
,
graph
,
scope
=
None
,
fetches
=
None
,
feed
=
None
):
assert
isinstance
(
graph
,
ImitationGraph
)
fetch_list
=
None
if
fetches
:
fetch_list
=
[
graph
.
program
.
global_block
().
var
(
name
)
for
name
in
fetches
]
results
=
self
.
exe
.
run
(
graph
.
program
,
scope
=
scope
,
fetch_list
=
fetch_list
,
feed
=
feed
)
return
results
def
get_executor
(
graph
,
place
):
if
isinstance
(
graph
,
ImitationGraph
):
return
ImitationGraphExecutor
(
place
)
if
isinstance
(
graph
,
IRGraph
):
return
IRGraphExecutor
(
place
)
python/paddle/fluid/contrib/slim/graph/graph.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
....framework
import
Program
__all__
=
[
'Graph'
,
'ImitationGraph'
,
'IRGraph'
]
class
Graph
(
object
):
"""
Base class for all graph.
"""
def
__init__
(
self
):
pass
def
all_parameters
(
self
):
"""
Return all the parameters in current graph.
"""
pass
class
ImitationGraph
(
Graph
):
def
__init__
(
self
,
program
=
None
):
super
(
ImitationGraph
,
self
).
__init__
()
self
.
program
=
Program
()
if
program
is
None
else
program
def
all_parameters
(
self
):
return
self
.
program
.
global_block
().
all_parameters
()
class
IRGraph
(
Graph
):
pass
python/paddle/fluid/contrib/slim/graph/graph_pass.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
'GraphPass'
,
'PruneParameterPass'
]
class
GraphPass
(
object
):
"""
Base class for all graph pass.
"""
def
__init__
(
self
):
pass
def
apply
(
self
,
graph
):
pass
class
PruneParameterPass
(
GraphPass
):
"""
Generate a graph for pruning parameters from target graph.
"""
def
__init__
(
self
,
pruned_params
,
thresholds
):
super
(
PruneParameterPass
,
self
).
__init__
()
self
.
pruned_params
=
pruned_params
self
.
thresholds
=
thresholds
self
.
default_threshold
=
thresholds
[
'*'
]
def
apply
(
self
,
graph
):
pass
python/paddle/fluid/contrib/slim/prune/__init__.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.
import
pruner
from
.pruner
import
*
from
.
import
prune_strategy
from
.prune_strategy
import
*
__all__
=
pruner
.
__all__
__all__
+=
prune_strategy
.
__all__
python/paddle/fluid/contrib/slim/prune/prune_strategy.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
..core.strategy
import
Strategy
from
....framework
import
Program
,
program_guard
from
....
import
layers
import
numpy
as
np
__all__
=
[
'SensitivePruneStrategy'
,
'PruneStrategy'
]
class
SensitivePruneStrategy
(
Strategy
):
def
__init__
(
self
,
pruner
=
None
,
start_epoch
=
0
,
end_epoch
=
10
,
delta_rate
=
0.20
,
acc_loss_threshold
=
0.2
,
sensitivities
=
None
):
super
(
SensitivePruneStrategy
,
self
).
__init__
(
start_epoch
,
end_epoch
)
self
.
pruner
=
pruner
self
.
delta_rate
=
delta_rate
self
.
acc_loss_threshold
=
acc_loss_threshold
self
.
sensitivities
=
sensitivities
class
PruneStrategy
(
Strategy
):
"""
The strategy that pruning weights by threshold or ratio iteratively.
"""
def
__init__
(
self
,
pruner
,
mini_batch_pruning_frequency
=
1
,
start_epoch
=
0
,
end_epoch
=
10
):
super
(
PruneStrategy
,
self
).
__init__
(
start_epoch
,
end_epoch
)
self
.
pruner
=
pruner
self
.
mini_batch_pruning_frequency
=
mini_batch_pruning_frequency
def
_triger
(
self
,
context
):
return
(
context
.
batch_id
%
self
.
mini_batch_pruning_frequency
==
0
and
self
.
start_epoch
<=
context
.
epoch_id
<
self
.
end_epoch
)
def
on_batch_end
(
self
,
context
):
if
self
.
_triger
(
context
):
prune_program
=
Program
()
with
program_guard
(
prune_program
):
for
param
in
context
.
graph
.
all_parameters
():
prune_program
.
global_block
().
clone_variable
(
param
)
p
=
prune_program
.
global_block
().
var
(
param
.
name
)
zeros_mask
=
self
.
pruner
.
prune
(
p
)
pruned_param
=
p
*
zeros_mask
layers
.
assign
(
input
=
pruned_param
,
output
=
param
)
context
.
program_exe
.
run
(
prune_program
,
scope
=
context
.
scope
)
python/paddle/fluid/contrib/slim/prune/pruner.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
from
....
import
layers
__all__
=
[
'Pruner'
,
'MagnitudePruner'
,
'RatioPruner'
]
class
Pruner
(
object
):
"""
Base class of all pruners.
"""
def
__init__
(
self
):
pass
def
prune
(
self
,
param
):
pass
class
MagnitudePruner
(
Pruner
):
"""
Pruner used to pruning a parameter by threshold.
"""
def
__init__
(
self
,
threshold
):
self
.
threshold
=
threshold
def
prune
(
self
,
param
,
threshold
=
None
):
if
threshold
is
None
:
thres
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
self
.
threshold
)
else
:
thres
=
threshold
zeros_mask
=
layers
.
less_than
(
x
=
param
,
y
=
thres
)
return
zeros_mask
class
RatioPruner
(
Pruner
):
"""
Pruner used to pruning a parameter by ratio.
"""
def
__init__
(
self
,
ratios
=
None
):
"""
Args:
ratios: dict with pair (paramer_name, pruned_ratio).
"""
self
.
ratios
=
ratios
def
prune
(
self
,
param
,
ratio
=
None
):
"""
Args:
ratio: `ratio=40%` means pruning (1 - 40%) weights to zero.
"""
if
ratio
is
None
:
rat
=
self
.
ratios
[
param
.
name
]
if
param
.
name
in
self
.
ratios
else
self
.
ratios
[
'*'
]
else
:
rat
=
ratio
if
rat
<
1.0
:
k
=
max
(
int
(
rat
*
np
.
prod
(
param
.
shape
)),
1
)
param_vec
=
layers
.
reshape
(
x
=
param
,
shape
=
[
1
,
-
1
])
param_topk
,
_
=
layers
.
topk
(
param_vec
,
k
=
k
)
threshold
=
layers
.
slice
(
param_topk
,
axes
=
[
1
],
starts
=
[
-
1
],
ends
=
[
k
])
threshold
=
layers
.
reshape
(
x
=
threshold
,
shape
=
[
1
])
zeros_mask
=
layers
.
less_than
(
x
=
param
,
y
=
threshold
)
else
:
zeros_mask
=
layers
.
ones
(
param
.
shape
)
return
zeros_mask
python/paddle/fluid/contrib/slim/unitest/__init__.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
python/paddle/fluid/contrib/slim/unitest/configs/config.yaml
0 → 100644
浏览文件 @
8ed02339
version
:
1.0
include
:
[
"
./unitest/configs/pruners.yaml"
,
"
./unitest/configs/pruners_0.yaml"
]
pruners
:
pruner_1
:
class
:
'
RatioPruner'
ratios
:
'
conv1_1.w'
:
0.3
'
conv1_2.w'
:
0.4
'
*'
:
0.9
group_dims
:
'
*'
:
[
1
,
2
,
3
]
criterions
:
'
*'
:
'
l1-norm'
strategies
:
strategy_1
:
class
:
'
SensitivePruneStrategy'
pruner
:
'
pruner_2'
start_epoch
:
0
end_epoch
:
10
delta_rate
:
0.20
acc_loss_threshold
:
0.2
sensitivities
:
'
conv1_1.w'
:
0.4
compress_pass
:
class
:
'
CompressPass'
epoch
:
100
strategies
:
-
strategy_1
python/paddle/fluid/contrib/slim/unitest/configs/pruners.yaml
0 → 100644
浏览文件 @
8ed02339
version
:
1.0
pruners
:
pruner_2
:
class
:
'
RatioPruner'
ratios
:
'
conv1_1.w'
:
0.5
'
conv1_2.w'
:
0.2
'
*'
:
0.7
group_dims
:
'
*'
:
[
1
,
2
,
3
]
criterions
:
'
*'
:
'
l1-norm'
python/paddle/fluid/contrib/slim/unitest/configs/pruners_0.yaml
0 → 100644
浏览文件 @
8ed02339
version
:
1.0
pruners
:
pruner_3
:
class
:
'
RatioPruner'
ratios
:
'
conv1_1.w'
:
0.5
'
conv1_2.w'
:
0.2
'
*'
:
0.7
group_dims
:
'
*'
:
[
1
,
2
,
3
]
criterions
:
'
*'
:
'
l1-norm'
python/paddle/fluid/contrib/slim/unitest/test_factory.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.fluid.contrib.slim
import
ConfigFactory
import
unittest
class
TestFactory
(
unittest
.
TestCase
):
def
test_parse
(
self
):
factory
=
ConfigFactory
(
'./unitest/configs/config.yaml'
)
pruner
=
factory
.
instance
(
'pruner_1'
)
self
.
assertEquals
(
pruner
.
ratios
[
'conv1_1.w'
],
0.3
)
pruner
=
factory
.
instance
(
'pruner_2'
)
self
.
assertEquals
(
pruner
.
ratios
[
'*'
],
0.7
)
strategy
=
factory
.
instance
(
'strategy_1'
)
pruner
=
strategy
.
pruner
self
.
assertEquals
(
pruner
.
ratios
[
'*'
],
0.7
)
compress_pass
=
factory
.
get_compress_pass
()
self
.
assertEquals
(
compress_pass
.
epoch
,
100
)
strategy
=
compress_pass
.
strategies
[
0
]
self
.
assertEquals
(
strategy
.
delta_rate
,
0.2
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/data_feeder.py
浏览文件 @
8ed02339
...
@@ -44,6 +44,8 @@ class DataToLoDTensorConverter(object):
...
@@ -44,6 +44,8 @@ class DataToLoDTensorConverter(object):
self
.
dtype
=
'int64'
self
.
dtype
=
'int64'
elif
dtype
==
core
.
VarDesc
.
VarType
.
FP64
:
elif
dtype
==
core
.
VarDesc
.
VarType
.
FP64
:
self
.
dtype
=
'float64'
self
.
dtype
=
'float64'
elif
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
self
.
dtype
=
'float16'
elif
dtype
==
core
.
VarDesc
.
VarType
.
INT32
:
elif
dtype
==
core
.
VarDesc
.
VarType
.
INT32
:
self
.
dtype
=
'int32'
self
.
dtype
=
'int32'
elif
dtype
==
core
.
VarDesc
.
VarType
.
UINT8
:
elif
dtype
==
core
.
VarDesc
.
VarType
.
UINT8
:
...
...
python/paddle/fluid/executor.py
浏览文件 @
8ed02339
...
@@ -191,7 +191,7 @@ def _fetch_var(name, scope=None, return_numpy=True):
...
@@ -191,7 +191,7 @@ def _fetch_var(name, scope=None, return_numpy=True):
assert
isinstance
(
name
,
str
)
assert
isinstance
(
name
,
str
)
if
scope
is
None
:
if
scope
is
None
:
scope
=
global_scope
()
scope
=
global_scope
()
assert
isinstance
(
scope
,
core
.
Scope
)
assert
isinstance
(
scope
,
core
.
_
Scope
)
var
=
scope
.
find_var
(
name
)
var
=
scope
.
find_var
(
name
)
assert
var
is
not
None
,
(
assert
var
is
not
None
,
(
...
...
python/paddle/fluid/framework.py
浏览文件 @
8ed02339
...
@@ -20,6 +20,7 @@ import os
...
@@ -20,6 +20,7 @@ import os
import
re
import
re
import
six
import
six
import
sys
import
sys
import
traceback
import
numpy
as
np
import
numpy
as
np
...
@@ -604,6 +605,10 @@ class Operator(object):
...
@@ -604,6 +605,10 @@ class Operator(object):
if
role_var_name
in
op_attrs
and
len
(
op_attrs
[
role_var_name
])
==
0
:
if
role_var_name
in
op_attrs
and
len
(
op_attrs
[
role_var_name
])
==
0
:
del
op_attrs
[
role_var_name
]
del
op_attrs
[
role_var_name
]
callstack_var_name
=
op_maker
.
kOpCreationCallstackAttrName
()
op_attrs
[
callstack_var_name
]
=
list
(
reversed
(
traceback
.
format_stack
()))[
1
:]
if
len
(
self
.
desc
.
type
())
!=
0
:
if
len
(
self
.
desc
.
type
())
!=
0
:
return
return
if
type
is
None
:
if
type
is
None
:
...
...
python/paddle/fluid/initializer.py
浏览文件 @
8ed02339
...
@@ -18,6 +18,7 @@ from . import framework
...
@@ -18,6 +18,7 @@ from . import framework
import
numpy
as
np
import
numpy
as
np
import
contextlib
import
contextlib
from
.core
import
VarDesc
from
.core
import
VarDesc
from
.
import
unique_name
__all__
=
[
__all__
=
[
'Constant'
,
'Uniform'
,
'Normal'
,
'TruncatedNormal'
,
'Xavier'
,
'Bilinear'
,
'Constant'
,
'Uniform'
,
'Normal'
,
'TruncatedNormal'
,
'Xavier'
,
'Bilinear'
,
...
@@ -207,16 +208,39 @@ class UniformInitializer(Initializer):
...
@@ -207,16 +208,39 @@ class UniformInitializer(Initializer):
# Initialization Ops should be prepended and not appended
# Initialization Ops should be prepended and not appended
if
self
.
_seed
==
0
:
if
self
.
_seed
==
0
:
self
.
_seed
=
block
.
program
.
random_seed
self
.
_seed
=
block
.
program
.
random_seed
# to be compatible of fp16 initalizers
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
out_dtype
=
VarDesc
.
VarType
.
FP32
out_var
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
([
'gaussian_random'
,
'tmp'
])),
shape
=
var
.
shape
,
dtype
=
out_dtype
,
type
=
VarDesc
.
VarType
.
LOD_TENSOR
,
persistable
=
False
)
else
:
out_dtype
=
var
.
dtype
out_var
=
var
op
=
block
.
_prepend_op
(
op
=
block
.
_prepend_op
(
type
=
"uniform_random"
,
type
=
"uniform_random"
,
outputs
=
{
"Out"
:
var
},
outputs
=
{
"Out"
:
out_
var
},
attrs
=
{
attrs
=
{
"shape"
:
var
.
shape
,
"shape"
:
var
.
shape
,
"dtype"
:
int
(
var
.
dtype
)
,
"dtype"
:
out_dtype
,
"min"
:
self
.
_low
,
"min"
:
self
.
_low
,
"max"
:
self
.
_high
,
"max"
:
self
.
_high
,
"seed"
:
self
.
_seed
"seed"
:
self
.
_seed
})
})
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
block
.
append_op
(
type
=
"cast"
,
inputs
=
{
"X"
:
out_var
},
outputs
=
{
"Out"
:
var
},
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
var
.
op
=
op
var
.
op
=
op
return
op
return
op
...
@@ -261,17 +285,39 @@ class NormalInitializer(Initializer):
...
@@ -261,17 +285,39 @@ class NormalInitializer(Initializer):
# Initialization Ops should be prepended and not appended
# Initialization Ops should be prepended and not appended
if
self
.
_seed
==
0
:
if
self
.
_seed
==
0
:
self
.
_seed
=
block
.
program
.
random_seed
self
.
_seed
=
block
.
program
.
random_seed
# to be compatible of fp16 initalizers
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
out_dtype
=
VarDesc
.
VarType
.
FP32
out_var
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
([
'gaussian_random'
,
'tmp'
])),
shape
=
var
.
shape
,
dtype
=
out_dtype
,
type
=
VarDesc
.
VarType
.
LOD_TENSOR
,
persistable
=
False
)
else
:
out_dtype
=
var
.
dtype
out_var
=
var
op
=
block
.
_prepend_op
(
op
=
block
.
_prepend_op
(
type
=
"gaussian_random"
,
type
=
"gaussian_random"
,
outputs
=
{
"Out"
:
var
},
outputs
=
{
"Out"
:
out_
var
},
attrs
=
{
attrs
=
{
"shape"
:
var
.
shape
,
"shape"
:
var
.
shape
,
"dtype"
:
int
(
var
.
dtype
)
,
"dtype"
:
out_dtype
,
"mean"
:
self
.
_mean
,
"mean"
:
self
.
_mean
,
"std"
:
self
.
_std_dev
,
"std"
:
self
.
_std_dev
,
"seed"
:
self
.
_seed
,
"seed"
:
self
.
_seed
,
"use_mkldnn"
:
False
"use_mkldnn"
:
False
})
})
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
block
.
append_op
(
type
=
"cast"
,
inputs
=
{
"X"
:
out_var
},
outputs
=
{
"Out"
:
var
},
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
var
.
op
=
op
var
.
op
=
op
return
op
return
op
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
8ed02339
...
@@ -2801,6 +2801,10 @@ def batch_norm(input,
...
@@ -2801,6 +2801,10 @@ def batch_norm(input,
helper
=
LayerHelper
(
'batch_norm'
,
**
locals
())
helper
=
LayerHelper
(
'batch_norm'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
# use fp32 for bn parameter
if
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
dtype
=
core
.
VarDesc
.
VarType
.
FP32
input_shape
=
input
.
shape
input_shape
=
input
.
shape
if
data_layout
==
'NCHW'
:
if
data_layout
==
'NCHW'
:
channel_num
=
input_shape
[
1
]
channel_num
=
input_shape
[
1
]
...
@@ -2835,7 +2839,7 @@ def batch_norm(input,
...
@@ -2835,7 +2839,7 @@ def batch_norm(input,
trainable
=
False
,
trainable
=
False
,
do_model_average
=
do_model_average_for_mean_and_var
),
do_model_average
=
do_model_average_for_mean_and_var
),
shape
=
param_shape
,
shape
=
param_shape
,
dtype
=
input
.
dtype
)
dtype
=
dtype
)
mean
.
stop_gradient
=
True
mean
.
stop_gradient
=
True
variance
=
helper
.
create_parameter
(
variance
=
helper
.
create_parameter
(
...
@@ -2845,7 +2849,7 @@ def batch_norm(input,
...
@@ -2845,7 +2849,7 @@ def batch_norm(input,
trainable
=
False
,
trainable
=
False
,
do_model_average
=
do_model_average_for_mean_and_var
),
do_model_average
=
do_model_average_for_mean_and_var
),
shape
=
param_shape
,
shape
=
param_shape
,
dtype
=
input
.
dtype
)
dtype
=
dtype
)
variance
.
stop_gradient
=
True
variance
.
stop_gradient
=
True
# create output
# create output
...
@@ -4526,7 +4530,7 @@ def topk(input, k, name=None):
...
@@ -4526,7 +4530,7 @@ def topk(input, k, name=None):
Args:
Args:
input(Variable): The input variable which can be a vector or Tensor with
input(Variable): The input variable which can be a vector or Tensor with
higher rank.
higher rank.
k(int): The number of top elements to look for along the last dimension
k(int
| Variable
): The number of top elements to look for along the last dimension
of input.
of input.
name(str|None): A name for this layer(optional). If set None, the layer
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
will be named automatically.
...
@@ -4549,12 +4553,18 @@ def topk(input, k, name=None):
...
@@ -4549,12 +4553,18 @@ def topk(input, k, name=None):
helper
=
LayerHelper
(
"top_k"
,
**
locals
())
helper
=
LayerHelper
(
"top_k"
,
**
locals
())
values
=
helper
.
create_variable_for_type_inference
(
dtype
=
input
.
dtype
)
values
=
helper
.
create_variable_for_type_inference
(
dtype
=
input
.
dtype
)
indices
=
helper
.
create_variable_for_type_inference
(
dtype
=
"int64"
)
indices
=
helper
.
create_variable_for_type_inference
(
dtype
=
"int64"
)
inputs
=
{
"X"
:
[
input
]}
attrs
=
None
if
isinstance
(
k
,
Variable
):
inputs
[
'K'
]
=
k
else
:
attrs
=
{
'k'
:
k
}
helper
.
append_op
(
helper
.
append_op
(
type
=
"top_k"
,
type
=
"top_k"
,
inputs
=
{
"X"
:
[
input
]}
,
inputs
=
inputs
,
outputs
=
{
"Out"
:
[
values
],
outputs
=
{
"Out"
:
[
values
],
"Indices"
:
[
indices
]},
"Indices"
:
[
indices
]},
attrs
=
{
"k"
:
k
}
)
attrs
=
attrs
)
values
.
stop_gradient
=
True
values
.
stop_gradient
=
True
indices
.
stop_gradient
=
True
indices
.
stop_gradient
=
True
return
values
,
indices
return
values
,
indices
...
@@ -7943,7 +7953,7 @@ def unstack(x, axis=0, num=None):
...
@@ -7943,7 +7953,7 @@ def unstack(x, axis=0, num=None):
num
=
x
.
shape
[
axis
]
num
=
x
.
shape
[
axis
]
outs
=
[]
outs
=
[]
for
_
in
num
:
for
_
in
range
(
num
)
:
outs
.
append
(
helper
.
create_variable_for_type_inference
(
x
.
dtype
))
outs
.
append
(
helper
.
create_variable_for_type_inference
(
x
.
dtype
))
helper
.
append_op
(
helper
.
append_op
(
...
...
python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
from
paddle.fluid.tests.unittests.test_fill_constant_op
import
TestFillConstantOp1
,
TestFillConstantOp2
,
TestFillConstantOpWithSelectedRows
class
TestNGRAPHFillConstantOp1
(
TestFillConstantOp1
):
def
setUp
(
self
):
super
(
TestNGRAPHFillConstantOp1
,
self
).
setUp
()
class
TestNGRAPHFillConstantOp2
(
TestFillConstantOp2
):
def
setUp
(
self
):
super
(
TestNGRAPHFillConstantOp2
,
self
).
setUp
()
class
TestNGRAPHFillConstantOpWithSelectedRows
(
TestFillConstantOpWithSelectedRows
):
def
setUp
(
self
):
super
(
TestFillConstantOpWithSelectedRows
,
self
).
setUp
()
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
from
paddle.fluid.tests.unittests.test_top_k_op
import
TestTopkOp
,
TestTopkOp3d
,
TestTopkOp2
,
TestTopkOp3
,
TestTopkOp4
class
TestNGRAPHTopkOp
(
TestTopkOp
):
def
setUp
(
self
):
super
(
TestNGRAPHTopkOp
,
self
).
setUp
()
class
TestNGRAPHTopkOp2
(
TestTopkOp2
):
def
setUp
(
self
):
super
(
TestNGRAPHTopkOp2
,
self
).
setUp
()
class
TestNGRAPHTopkOp3
(
TestTopkOp3
):
def
setUp
(
self
):
super
(
TestNGRAPHTopkOp3
,
self
).
setUp
()
class
TestNGRAPHTopkOp4
(
TestTopkOp4
):
def
setUp
(
self
):
super
(
TestNGRAPHTopkOp4
,
self
).
setUp
()
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
8ed02339
...
@@ -368,6 +368,8 @@ class OpTest(unittest.TestCase):
...
@@ -368,6 +368,8 @@ class OpTest(unittest.TestCase):
place
=
core
.
CUDAPlace
(
0
)
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
if
core
.
is_float16_supported
(
place
):
return
[
place
]
return
[
place
]
else
:
return
[]
else
:
else
:
return
[]
return
[]
places
=
[
fluid
.
CPUPlace
()]
places
=
[
fluid
.
CPUPlace
()]
...
...
python/paddle/fluid/tests/unittests/test_accuracy_op.py
浏览文件 @
8ed02339
...
@@ -22,8 +22,10 @@ from op_test import OpTest
...
@@ -22,8 +22,10 @@ from op_test import OpTest
class
TestAccuracyOp
(
OpTest
):
class
TestAccuracyOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"accuracy"
self
.
op_type
=
"accuracy"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
n
=
8192
n
=
8192
infer
=
np
.
random
.
random
((
n
,
1
)).
astype
(
"float32"
)
infer
=
np
.
random
.
random
((
n
,
1
)).
astype
(
self
.
dtype
)
indices
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
indices
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
label
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
label
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
self
.
inputs
=
{
'Out'
:
infer
,
'Indices'
:
indices
,
"Label"
:
label
}
self
.
inputs
=
{
'Out'
:
infer
,
'Indices'
:
indices
,
"Label"
:
label
}
...
@@ -34,14 +36,25 @@ class TestAccuracyOp(OpTest):
...
@@ -34,14 +36,25 @@ class TestAccuracyOp(OpTest):
num_correct
+=
1
num_correct
+=
1
break
break
self
.
outputs
=
{
self
.
outputs
=
{
'Accuracy'
:
np
.
array
([
num_correct
/
float
(
n
)]).
astype
(
"float32"
),
'Accuracy'
:
np
.
array
([
num_correct
/
float
(
n
)]).
astype
(
self
.
dtype
),
'Correct'
:
np
.
array
([
num_correct
]).
astype
(
"int32"
),
'Correct'
:
np
.
array
([
num_correct
]).
astype
(
"int32"
),
'Total'
:
np
.
array
([
n
]).
astype
(
"int32"
)
'Total'
:
np
.
array
([
n
]).
astype
(
"int32"
)
}
}
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
class
TestAccuracyOpFp16
(
TestAccuracyOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
1e-3
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_dequantize_mkldnn_op.py
0 → 100644
浏览文件 @
8ed02339
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
class
TestDeQuantizeOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
'dequantize'
self
.
scale
=
2.0
self
.
input_size
=
[
1
,
1
,
5
,
5
]
#Naive nChw16c
self
.
data_type
=
'int8'
self
.
set_scale
()
self
.
set_data_type
()
if
self
.
data_type
==
'int8'
:
input
=
(
np
.
random
.
randint
(
0
,
100
,
self
.
input_size
)
-
50
).
astype
(
self
.
data_type
)
output
=
(
input
*
(
1
/
self
.
scale
)).
astype
(
'float'
)
else
:
input
=
(
np
.
random
.
randint
(
0
,
100
,
self
.
input_size
)).
astype
(
self
.
data_type
)
output
=
(
input
*
(
1
/
self
.
scale
)).
astype
(
'float'
)
self
.
inputs
=
{
'Input'
:
OpTest
.
np_dtype_to_fluid_dtype
(
input
)}
self
.
outputs
=
{
'Output'
:
output
}
self
.
attrs
=
{
'Scale'
:
self
.
scale
,
}
def
test_check_output
(
self
):
self
.
check_output
()
def
set_scale
(
self
):
pass
def
set_data_type
(
OpTest
):
pass
class
TestDeQuantizeOp1
(
TestDeQuantizeOp
):
def
set_scale
(
self
):
self
.
scale
=
1.5
def
set_data_type
(
self
):
self
.
data_type
=
'int8'
class
TestDeQuantizeOp2
(
TestDeQuantizeOp
):
def
set_scale
(
self
):
self
.
scale
=
0.8
def
set_data_type
(
self
):
self
.
data_type
=
'uint8'
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
浏览文件 @
8ed02339
...
@@ -21,14 +21,16 @@ from op_test import OpTest
...
@@ -21,14 +21,16 @@ from op_test import OpTest
class
ElementwiseDivOp
(
OpTest
):
class
ElementwiseDivOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"elementwise_div"
self
.
op_type
=
"elementwise_div"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
""" Warning
""" Warning
CPU gradient check error!
CPU gradient check error!
'X': np.random.random((32,84)).astype("float32"),
'X': np.random.random((32,84)).astype("float32"),
'Y': np.random.random((32,84)).astype("float32")
'Y': np.random.random((32,84)).astype("float32")
"""
"""
self
.
inputs
=
{
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
"float32"
),
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
self
.
dtype
),
'Y'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
"float32"
)
'Y'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
self
.
dtype
)
}
}
self
.
outputs
=
{
'Out'
:
np
.
divide
(
self
.
inputs
[
'X'
],
self
.
inputs
[
'Y'
])}
self
.
outputs
=
{
'Out'
:
np
.
divide
(
self
.
inputs
[
'X'
],
self
.
inputs
[
'Y'
])}
...
@@ -46,6 +48,9 @@ class ElementwiseDivOp(OpTest):
...
@@ -46,6 +48,9 @@ class ElementwiseDivOp(OpTest):
self
.
check_grad
(
self
.
check_grad
(
[
'X'
],
'Out'
,
max_relative_error
=
0.05
,
no_grad_set
=
set
(
'Y'
))
[
'X'
],
'Out'
,
max_relative_error
=
0.05
,
no_grad_set
=
set
(
'Y'
))
def
init_dtype
(
self
):
pass
class
TestElementwiseDivOp_scalar
(
ElementwiseDivOp
):
class
TestElementwiseDivOp_scalar
(
ElementwiseDivOp
):
def
setUp
(
self
):
def
setUp
(
self
):
...
@@ -126,5 +131,21 @@ class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp):
...
@@ -126,5 +131,21 @@ class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp):
}
}
class
TestElementwiseDivOpFp16
(
ElementwiseDivOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_grad_normal
(
self
):
self
.
check_grad
([
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
1
)
def
test_check_grad_ingore_x
(
self
):
self
.
check_grad
(
[
'Y'
],
'Out'
,
max_relative_error
=
1
,
no_grad_set
=
set
(
"X"
))
def
test_check_grad_ingore_y
(
self
):
self
.
check_grad
(
[
'X'
],
'Out'
,
max_relative_error
=
1
,
no_grad_set
=
set
(
'Y'
))
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
浏览文件 @
8ed02339
...
@@ -135,5 +135,10 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp):
...
@@ -135,5 +135,10 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp):
}
}
class
TestElementwiseMulOpFp16
(
ElementwiseMulOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
浏览文件 @
8ed02339
...
@@ -22,12 +22,22 @@ from op_test import OpTest
...
@@ -22,12 +22,22 @@ from op_test import OpTest
class
TestFillZerosLikeOp
(
OpTest
):
class
TestFillZerosLikeOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"fill_zeros_like"
self
.
op_type
=
"fill_zeros_like"
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
219
,
232
)).
astype
(
"float32"
)}
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
219
,
232
)).
astype
(
self
.
dtype
)}
self
.
outputs
=
{
'Out'
:
np
.
zeros_like
(
self
.
inputs
[
"X"
])}
self
.
outputs
=
{
'Out'
:
np
.
zeros_like
(
self
.
inputs
[
"X"
])}
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
class
TestFillZerosLikeOpFp16
(
TestFillZerosLikeOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_momentum_op.py
浏览文件 @
8ed02339
...
@@ -24,11 +24,13 @@ from op_test import OpTest
...
@@ -24,11 +24,13 @@ from op_test import OpTest
class
TestMomentumOp1
(
OpTest
):
class
TestMomentumOp1
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"momentum"
self
.
op_type
=
"momentum"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
self
.
dtype
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
self
.
dtype
)
velocity
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
velocity
=
np
.
zeros
((
123
,
321
)).
astype
(
self
.
dtype
)
learning_rate
=
np
.
array
([
0.001
]).
astype
(
"float32"
)
learning_rate
=
np
.
array
([
0.001
]).
astype
(
self
.
dtype
)
mu
=
0.0001
mu
=
0.0001
use_nesterov
=
False
use_nesterov
=
False
...
@@ -50,10 +52,21 @@ class TestMomentumOp1(OpTest):
...
@@ -50,10 +52,21 @@ class TestMomentumOp1(OpTest):
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'VelocityOut'
:
velocity_out
}
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'VelocityOut'
:
velocity_out
}
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
class
TestMomentumOpFp16
(
TestMomentumOp1
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
1e-3
)
class
TestMomentumOp2
(
OpTest
):
class
TestMomentumOp2
(
OpTest
):
'''Test Momentum with default values for attributes
'''Test Momentum with default values for attributes
'''
'''
...
...
python/paddle/fluid/tests/unittests/test_operator_desc.py
浏览文件 @
8ed02339
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_py_func_op.py
浏览文件 @
8ed02339
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_quantize_mkldnn_op.py
0 → 100644
浏览文件 @
8ed02339
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_top_k_op.py
浏览文件 @
8ed02339
此差异已折叠。
点击以展开。
python/paddle/fluid/transpiler/inference_transpiler.py
浏览文件 @
8ed02339
此差异已折叠。
点击以展开。
python/requirements.txt
浏览文件 @
8ed02339
此差异已折叠。
点击以展开。
python/setup.py.in
浏览文件 @
8ed02339
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录