Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
883ee1a3
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
883ee1a3
编写于
12月 10, 2021
作者:
W
wanghuancoder
提交者:
GitHub
12月 10, 2021
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into revert-37926-eager_coreops_500
上级
823208fe
11c785a4
变更
141
展开全部
隐藏空白更改
内联
并排
Showing
141 changed file
with
7103 addition
and
930 deletion
+7103
-930
paddle/fluid/distributed/fleet_executor/carrier.h
paddle/fluid/distributed/fleet_executor/carrier.h
+5
-0
paddle/fluid/distributed/fleet_executor/compute_interceptor.cc
...e/fluid/distributed/fleet_executor/compute_interceptor.cc
+3
-0
paddle/fluid/eager/accumulation/gradient_accumulation.cc
paddle/fluid/eager/accumulation/gradient_accumulation.cc
+16
-0
paddle/fluid/framework/dlpack_tensor.cc
paddle/fluid/framework/dlpack_tensor.cc
+5
-0
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+8
-0
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+1
-1
paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.cc
.../fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.cc
+7
-0
paddle/fluid/framework/ir/ipu/avg_shard_pass.cc
paddle/fluid/framework/ir/ipu/avg_shard_pass.cc
+56
-0
paddle/fluid/framework/ir/ipu/avg_shard_pass.h
paddle/fluid/framework/ir/ipu/avg_shard_pass.h
+30
-0
paddle/fluid/framework/ir/ipu/forward_graph_extract_pass.cc
paddle/fluid/framework/ir/ipu/forward_graph_extract_pass.cc
+133
-0
paddle/fluid/framework/ir/ipu/forward_graph_extract_pass.h
paddle/fluid/framework/ir/ipu/forward_graph_extract_pass.h
+31
-0
paddle/fluid/framework/ir/ipu/infer_shape_pass.cc
paddle/fluid/framework/ir/ipu/infer_shape_pass.cc
+108
-0
paddle/fluid/framework/ir/ipu/infer_shape_pass.h
paddle/fluid/framework/ir/ipu/infer_shape_pass.h
+30
-0
paddle/fluid/framework/ir/ipu/inference_postprocess_pass.cc
paddle/fluid/framework/ir/ipu/inference_postprocess_pass.cc
+89
-0
paddle/fluid/framework/ir/ipu/inference_postprocess_pass.h
paddle/fluid/framework/ir/ipu/inference_postprocess_pass.h
+30
-0
paddle/fluid/framework/ir/ipu/inference_process_pass.cc
paddle/fluid/framework/ir/ipu/inference_process_pass.cc
+129
-0
paddle/fluid/framework/ir/ipu/inference_process_pass.h
paddle/fluid/framework/ir/ipu/inference_process_pass.h
+30
-0
paddle/fluid/framework/ir/ipu/ipu_graph_builder_pass.cc
paddle/fluid/framework/ir/ipu/ipu_graph_builder_pass.cc
+52
-0
paddle/fluid/framework/ir/ipu/ipu_graph_builder_pass.h
paddle/fluid/framework/ir/ipu/ipu_graph_builder_pass.h
+31
-0
paddle/fluid/framework/ir/ipu/ipu_inplace_pass.cc
paddle/fluid/framework/ir/ipu/ipu_inplace_pass.cc
+85
-0
paddle/fluid/framework/ir/ipu/ipu_inplace_pass.h
paddle/fluid/framework/ir/ipu/ipu_inplace_pass.h
+30
-0
paddle/fluid/framework/ir/ipu/ipu_pass_base.cc
paddle/fluid/framework/ir/ipu/ipu_pass_base.cc
+28
-0
paddle/fluid/framework/ir/ipu/ipu_pass_base.h
paddle/fluid/framework/ir/ipu/ipu_pass_base.h
+37
-0
paddle/fluid/framework/ir/ipu/ipu_runtime_replacer_pass.cc
paddle/fluid/framework/ir/ipu/ipu_runtime_replacer_pass.cc
+97
-0
paddle/fluid/framework/ir/ipu/ipu_runtime_replacer_pass.h
paddle/fluid/framework/ir/ipu/ipu_runtime_replacer_pass.h
+31
-0
paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc
paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc
+91
-0
paddle/fluid/framework/ir/ipu/optimizer_extract_pass.h
paddle/fluid/framework/ir/ipu/optimizer_extract_pass.h
+31
-0
paddle/fluid/framework/ir/ipu/optimizer_state_align_pass.cc
paddle/fluid/framework/ir/ipu/optimizer_state_align_pass.cc
+79
-0
paddle/fluid/framework/ir/ipu/optimizer_state_align_pass.h
paddle/fluid/framework/ir/ipu/optimizer_state_align_pass.h
+36
-0
paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc
...le/fluid/framework/ir/ipu/popart_canonicalization_pass.cc
+68
-0
paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.h
paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.h
+30
-0
paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc
.../fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc
+31
-1
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+3
-0
paddle/fluid/framework/paddle2cinn/CMakeLists.txt
paddle/fluid/framework/paddle2cinn/CMakeLists.txt
+1
-1
paddle/fluid/framework/paddle2cinn/cinn_cache_key.cc
paddle/fluid/framework/paddle2cinn/cinn_cache_key.cc
+51
-37
paddle/fluid/framework/paddle2cinn/cinn_cache_key.h
paddle/fluid/framework/paddle2cinn/cinn_cache_key.h
+36
-6
paddle/fluid/framework/paddle2cinn/cinn_cache_key_test.cc
paddle/fluid/framework/paddle2cinn/cinn_cache_key_test.cc
+112
-10
paddle/fluid/framework/paddle2cinn/cinn_compiler.cc
paddle/fluid/framework/paddle2cinn/cinn_compiler.cc
+29
-6
paddle/fluid/framework/paddle2cinn/cinn_compiler.h
paddle/fluid/framework/paddle2cinn/cinn_compiler.h
+13
-2
paddle/fluid/framework/pten_utils.cc
paddle/fluid/framework/pten_utils.cc
+4
-2
paddle/fluid/framework/tensor_util.cc
paddle/fluid/framework/tensor_util.cc
+46
-5
paddle/fluid/imperative/gradient_accumulator.cc
paddle/fluid/imperative/gradient_accumulator.cc
+7
-0
paddle/fluid/imperative/prepared_operator.cc
paddle/fluid/imperative/prepared_operator.cc
+8
-0
paddle/fluid/imperative/reducer.cc
paddle/fluid/imperative/reducer.cc
+0
-64
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+3
-2
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+28
-0
paddle/fluid/operators/cinn/cinn_launch_context.cc
paddle/fluid/operators/cinn/cinn_launch_context.cc
+97
-70
paddle/fluid/operators/cinn/cinn_launch_context.h
paddle/fluid/operators/cinn/cinn_launch_context.h
+19
-18
paddle/fluid/operators/cinn/cinn_launch_context_test.cc
paddle/fluid/operators/cinn/cinn_launch_context_test.cc
+38
-33
paddle/fluid/operators/cinn/cinn_launch_op.cu.cc
paddle/fluid/operators/cinn/cinn_launch_op.cu.cc
+0
-20
paddle/fluid/operators/cinn/cinn_launch_op.h
paddle/fluid/operators/cinn/cinn_launch_op.h
+44
-65
paddle/fluid/operators/cinn/cinn_launch_op_test.cc
paddle/fluid/operators/cinn/cinn_launch_op_test.cc
+3
-2
paddle/fluid/operators/complex_view_op.cc
paddle/fluid/operators/complex_view_op.cc
+163
-0
paddle/fluid/operators/complex_view_op.cu
paddle/fluid/operators/complex_view_op.cu
+29
-0
paddle/fluid/operators/complex_view_op.h
paddle/fluid/operators/complex_view_op.h
+60
-0
paddle/fluid/operators/ipu_runtime_op.cc
paddle/fluid/operators/ipu_runtime_op.cc
+62
-0
paddle/fluid/operators/ipu_runtime_op.h
paddle/fluid/operators/ipu_runtime_op.h
+69
-0
paddle/fluid/operators/math/math_function.cc
paddle/fluid/operators/math/math_function.cc
+7
-0
paddle/fluid/operators/py_layer_op.h
paddle/fluid/operators/py_layer_op.h
+1
-1
paddle/fluid/operators/reshape_op.cc
paddle/fluid/operators/reshape_op.cc
+2
-2
paddle/fluid/platform/device/gpu/cuda/cuda_graph.cc
paddle/fluid/platform/device/gpu/cuda/cuda_graph.cc
+14
-0
paddle/fluid/platform/device/gpu/cuda/cuda_graph.h
paddle/fluid/platform/device/gpu/cuda/cuda_graph.h
+22
-0
paddle/fluid/platform/device/gpu/gpu_primitives.h
paddle/fluid/platform/device/gpu/gpu_primitives.h
+15
-0
paddle/fluid/platform/device/ipu/CMakeLists.txt
paddle/fluid/platform/device/ipu/CMakeLists.txt
+1
-1
paddle/fluid/platform/device/ipu/popart_canonicalization/activation_ops.cc
...form/device/ipu/popart_canonicalization/activation_ops.cc
+72
-0
paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.cc
...ice/ipu/popart_canonicalization/canonicalization_utils.cc
+185
-0
paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h
...vice/ipu/popart_canonicalization/canonicalization_utils.h
+64
-0
paddle/fluid/platform/device/ipu/popart_canonicalization/elementwise_ops.cc
...orm/device/ipu/popart_canonicalization/elementwise_ops.cc
+108
-0
paddle/fluid/platform/device/ipu/popart_canonicalization/logic_ops.cc
.../platform/device/ipu/popart_canonicalization/logic_ops.cc
+36
-0
paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc
...d/platform/device/ipu/popart_canonicalization/math_ops.cc
+259
-0
paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc
...uid/platform/device/ipu/popart_canonicalization/nn_ops.cc
+301
-0
paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.cc
...platform/device/ipu/popart_canonicalization/op_builder.cc
+195
-0
paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h
.../platform/device/ipu/popart_canonicalization/op_builder.h
+85
-0
paddle/fluid/platform/device/ipu/supported_ops_autogen.h
paddle/fluid/platform/device/ipu/supported_ops_autogen.h
+2
-0
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+30
-2
paddle/fluid/platform/place.cc
paddle/fluid/platform/place.cc
+7
-0
paddle/fluid/platform/place.h
paddle/fluid/platform/place.h
+40
-2
paddle/fluid/pybind/.gitignore
paddle/fluid/pybind/.gitignore
+2
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+200
-1
paddle/fluid/pybind/reader_py.cc
paddle/fluid/pybind/reader_py.cc
+3
-0
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+15
-0
paddle/pten/api/lib/CMakeLists.txt
paddle/pten/api/lib/CMakeLists.txt
+4
-0
paddle/pten/api/lib/kernel_declare.h
paddle/pten/api/lib/kernel_declare.h
+37
-0
paddle/pten/api/lib/utils.cc
paddle/pten/api/lib/utils.cc
+6
-2
paddle/pten/core/kernel_alias_name.h
paddle/pten/core/kernel_alias_name.h
+2
-2
paddle/pten/core/kernel_factory.h
paddle/pten/core/kernel_factory.h
+1
-8
paddle/pten/core/kernel_registry.h
paddle/pten/core/kernel_registry.h
+147
-220
paddle/pten/kernels/cpu/creation.cc
paddle/pten/kernels/cpu/creation.cc
+2
-4
paddle/pten/kernels/cpu/linalg.cc
paddle/pten/kernels/cpu/linalg.cc
+2
-5
paddle/pten/kernels/cpu/manipulation.cc
paddle/pten/kernels/cpu/manipulation.cc
+22
-34
paddle/pten/kernels/cpu/math.cc
paddle/pten/kernels/cpu/math.cc
+8
-14
paddle/pten/kernels/cpu/utils.cc
paddle/pten/kernels/cpu/utils.cc
+1
-4
paddle/pten/kernels/cuda/creation.cu
paddle/pten/kernels/cuda/creation.cu
+2
-4
paddle/pten/kernels/cuda/linalg.cu
paddle/pten/kernels/cuda/linalg.cu
+2
-4
paddle/pten/kernels/cuda/manipulation.cu
paddle/pten/kernels/cuda/manipulation.cu
+22
-38
paddle/pten/kernels/cuda/math.cu
paddle/pten/kernels/cuda/math.cu
+8
-11
paddle/pten/kernels/cuda/utils.cu
paddle/pten/kernels/cuda/utils.cu
+1
-4
paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
+18
-1
paddle/pten/kernels/xpu/manipulation.cc
paddle/pten/kernels/xpu/manipulation.cc
+3
-13
paddle/pten/kernels/xpu/utils.cc
paddle/pten/kernels/xpu/utils.cc
+1
-4
paddle/pten/tests/api/test_reshape_api.cc
paddle/pten/tests/api/test_reshape_api.cc
+0
-6
python/paddle/__init__.py
python/paddle/__init__.py
+11
-0
python/paddle/device/__init__.py
python/paddle/device/__init__.py
+46
-3
python/paddle/distributed/auto_parallel/operators/dist_matmul.py
...paddle/distributed/auto_parallel/operators/dist_matmul.py
+442
-0
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py
...optimizers/dygraph_optimizer/sharding_optimizer_stage2.py
+65
-17
python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py
...stributed/fleet/meta_parallel/sharding/sharding_stage2.py
+54
-14
python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py
...istributed/fleet/meta_parallel/sharding/sharding_utils.py
+151
-0
python/paddle/distributed/fleet/utils/internal_storage.py
python/paddle/distributed/fleet/utils/internal_storage.py
+84
-9
python/paddle/distribution.py
python/paddle/distribution.py
+2
-1
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+7
-1
python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
.../paddle/fluid/contrib/slim/quantization/imperative/qat.py
+8
-3
python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
...d/contrib/slim/quantization/post_training_quantization.py
+72
-41
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+2
-0
python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_while.py
...ntrib/slim/tests/test_post_training_quantization_while.py
+313
-0
python/paddle/fluid/dataloader/dataloader_iter.py
python/paddle/fluid/dataloader/dataloader_iter.py
+4
-0
python/paddle/fluid/dygraph/dygraph_to_static/utils.py
python/paddle/fluid/dygraph/dygraph_to_static/utils.py
+5
-1
python/paddle/fluid/dygraph/varbase_patch_methods.py
python/paddle/fluid/dygraph/varbase_patch_methods.py
+1
-1
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+20
-0
python/paddle/fluid/reader.py
python/paddle/fluid/reader.py
+4
-1
python/paddle/fluid/tests/unittests/dygraph_sharding_stage2.py
...n/paddle/fluid/tests/unittests/dygraph_sharding_stage2.py
+10
-9
python/paddle/fluid/tests/unittests/dygraph_sharding_stage2_offload.py
.../fluid/tests/unittests/dygraph_sharding_stage2_offload.py
+115
-0
python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
...ts/unittests/dygraph_to_static/test_program_translator.py
+14
-0
python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
...ddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
+21
-13
python/paddle/fluid/tests/unittests/ir/inference/test_adaptive_pool2d_convert_global_pass_autoscan.py
...ence/test_adaptive_pool2d_convert_global_pass_autoscan.py
+1
-1
python/paddle/fluid/tests/unittests/ir/inference/test_fc_elementwise_layernorm_fuse_pass.py
...s/ir/inference/test_fc_elementwise_layernorm_fuse_pass.py
+135
-0
python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py
...s/ir/inference/test_transpose_flatten_concat_fuse_pass.py
+131
-56
python/paddle/fluid/tests/unittests/test_auto_search_dist_matmul_op.py
.../fluid/tests/unittests/test_auto_search_dist_matmul_op.py
+397
-0
python/paddle/fluid/tests/unittests/test_complex_view_op.py
python/paddle/fluid/tests/unittests/test_complex_view_op.py
+127
-0
python/paddle/fluid/tests/unittests/test_cuda_graph.py
python/paddle/fluid/tests/unittests/test_cuda_graph.py
+44
-1
python/paddle/fluid/tests/unittests/test_distribution.py
python/paddle/fluid/tests/unittests/test_distribution.py
+23
-0
python/paddle/fluid/tests/unittests/test_dygraph_sharding_stage2.py
...dle/fluid/tests/unittests/test_dygraph_sharding_stage2.py
+3
-0
python/paddle/fluid/tests/unittests/test_fleet_executor.py
python/paddle/fluid/tests/unittests/test_fleet_executor.py
+12
-0
python/paddle/fluid/tests/unittests/test_gcd.py
python/paddle/fluid/tests/unittests/test_gcd.py
+93
-0
python/paddle/fluid/tests/unittests/test_lcm.py
python/paddle/fluid/tests/unittests/test_lcm.py
+93
-0
python/paddle/framework/__init__.py
python/paddle/framework/__init__.py
+1
-0
python/paddle/tensor/__init__.py
python/paddle/tensor/__init__.py
+11
-0
python/paddle/tensor/manipulation.py
python/paddle/tensor/manipulation.py
+92
-0
python/paddle/tensor/math.py
python/paddle/tensor/math.py
+137
-5
python/paddle/utils/code_gen/api.yaml
python/paddle/utils/code_gen/api.yaml
+1
-1
python/paddle/utils/code_gen/api_gen.py
python/paddle/utils/code_gen/api_gen.py
+1
-17
tools/parallel_UT_rule.py
tools/parallel_UT_rule.py
+2
-2
未找到文件。
paddle/fluid/distributed/fleet_executor/carrier.h
浏览文件 @
883ee1a3
...
...
@@ -75,6 +75,11 @@ class Carrier final {
bool
IsInit
()
const
;
// NOTE: This mutex will be used in interceptor's RunOps function.
// This mutex is used for avoiding forward ops and backward ops run
// simultaneously, which will lead to a random hang for some sync ops.
std
::
mutex
run
;
DISABLE_COPY_AND_ASSIGN
(
Carrier
);
private:
...
...
paddle/fluid/distributed/fleet_executor/compute_interceptor.cc
浏览文件 @
883ee1a3
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/distributed/fleet_executor/compute_interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/carrier.h"
#include "paddle/fluid/distributed/fleet_executor/task_node.h"
#include "paddle/fluid/framework/executor_gc_helper.h"
...
...
@@ -169,6 +170,8 @@ void ComputeInterceptor::ReplyCompletedToUpStream() {
}
void
ComputeInterceptor
::
RunOps
()
{
Carrier
&
carrier_instance
=
Carrier
::
Instance
();
std
::
unique_lock
<
std
::
mutex
>
lock
(
carrier_instance
.
run
);
VLOG
(
3
)
<<
"ComputeInterceptor "
<<
interceptor_id_
<<
" running ops for the "
<<
step_
+
1
<<
" time."
;
for
(
auto
op
:
node_
->
ops
())
{
...
...
paddle/fluid/eager/accumulation/gradient_accumulation.cc
浏览文件 @
883ee1a3
...
...
@@ -116,6 +116,22 @@ class TensorAddFunctor : public boost::static_visitor<> {
}
#endif
#ifdef PADDLE_WITH_IPU
void
operator
()(
const
paddle
::
platform
::
IPUPlace
&
place
)
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
PermissionDenied
(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode"
,
place
));
}
#else
void
operator
()(
const
paddle
::
platform
::
IPUPlace
&
place
)
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
PermissionDenied
(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode"
,
place
));
}
#endif
void
operator
()(
const
paddle
::
platform
::
NPUPinnedPlace
&
place
)
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
PermissionDenied
(
"Gradient accumulation on place (%s) "
...
...
paddle/fluid/framework/dlpack_tensor.cc
浏览文件 @
883ee1a3
...
...
@@ -81,6 +81,11 @@ struct DLDeviceVisitor : public boost::static_visitor<::DLDevice> {
return
device
;
}
inline
::
DLDevice
operator
()(
const
platform
::
IPUPlace
&
place
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"platform::IPUPlace is not supported"
));
}
inline
::
DLDevice
operator
()(
const
platform
::
XPUPlace
&
place
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"platform::XPUPlace is not supported"
));
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
883ee1a3
...
...
@@ -463,6 +463,14 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
#else
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"No XPU gc found in CPU/GPU paddle"
));
#endif
}
else
if
(
platform
::
is_ipu_place
(
place_
))
{
#ifdef PADDLE_WITH_IPU
gc
.
reset
(
new
IPUGarbageCollector
(
BOOST_GET_CONST
(
platform
::
IPUPlace
,
place_
),
max_memory_size
));
#else
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"No IPU gc found in CPU/IPU paddle"
));
#endif
}
else
if
(
platform
::
is_npu_place
(
place_
))
{
#ifdef PADDLE_WITH_ASCEND_CL
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
883ee1a3
...
...
@@ -156,7 +156,7 @@ cc_test(test_seqpool_cvm_concat_fuse_pass SRCS seqpool_cvm_concat_fuse_pass_test
cc_test
(
test_repeated_fc_relu_fuse_pass_cc SRCS repeated_fc_relu_fuse_pass_tester.cc DEPS repeated_fc_relu_fuse_pass framework_proto
)
cc_test
(
test_is_test_pass SRCS is_test_pass_tester.cc DEPS is_test_pass
)
cc_test
(
test_simplify_with_basic_ops_pass SRCS simplify_with_basic_ops_pass_tester.cc DEPS simplify_with_basic_ops_pass
)
cc_test
(
test_fc_elementwise_layernorm_fuse_pass SRCS fc_elementwise_layernorm_fuse_pass_tester.cc DEPS fc_elementwise_layernorm_fuse_pass
)
cc_test
(
test_fc_elementwise_layernorm_fuse_pass
_cc
SRCS fc_elementwise_layernorm_fuse_pass_tester.cc DEPS fc_elementwise_layernorm_fuse_pass
)
cc_test
(
test_skip_layernorm_fuse_pass SRCS skip_layernorm_fuse_pass_tester.cc DEPS skip_layernorm_fuse_pass
)
cc_test
(
test_multihead_matmul_fuse_pass SRCS multihead_matmul_fuse_pass_tester.cc DEPS multihead_matmul_fuse_pass
)
cc_test
(
test_conv_bn_fuse_pass_cc SRCS conv_bn_fuse_pass_tester.cc DEPS conv_bn_fuse_pass
)
...
...
paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.cc
浏览文件 @
883ee1a3
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -338,3 +339,9 @@ void FCElementwiseLayerNormFusePass::ApplyImpl(ir::Graph *graph) const {
REGISTER_PASS
(
fc_elementwise_layernorm_fuse_pass
,
paddle
::
framework
::
ir
::
FCElementwiseLayerNormFusePass
);
REGISTER_PASS_CAPABILITY
(
fc_elementwise_layernorm_fuse_pass
)
.
AddCombination
(
paddle
::
framework
::
compatible
::
OpVersionComparatorCombination
()
.
EQ
(
"fc"
,
0
)
.
LE
(
"elementwise_add"
,
1
)
.
EQ
(
"layer_norm"
,
0
));
paddle/fluid/framework/ir/ipu/avg_shard_pass.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/avg_shard_pass.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
AvgShardPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
10
)
<<
"enter AvgShardPass::ApplyImpl"
;
std
::
shared_ptr
<
platform
::
ipu
::
IpuBackend
>
ipu_backend
=
platform
::
ipu
::
IpuBackend
::
GetInstance
();
if
(
ipu_backend
->
GetIpuStrategy
()
->
need_avg_shard
)
{
VLOG
(
10
)
<<
"start AvgShardPass"
;
auto
nodes
=
ir
::
TopologySortOperations
(
*
graph
);
auto
num_ipus
=
ipu_backend
->
GetIpuStrategy
()
->
num_ipus
;
int
shard_position
=
nodes
.
size
()
/
num_ipus
;
int
index_and_stage
=
-
1
;
for
(
int
i
=
0
;
i
<
nodes
.
size
();
i
++
)
{
if
((
i
%
shard_position
)
==
0
&&
index_and_stage
<
num_ipus
-
1
)
{
index_and_stage
++
;
}
nodes
[
i
]
->
Op
()
->
SetAttr
(
"ipu_index"
,
index_and_stage
);
nodes
[
i
]
->
Op
()
->
SetAttr
(
"ipu_stage"
,
index_and_stage
);
}
VLOG
(
10
)
<<
"end AvgShardPass"
;
}
VLOG
(
10
)
<<
"leave AvgShardPass::ApplyImpl"
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
avg_shard_pass
,
paddle
::
framework
::
ir
::
AvgShardPass
);
paddle/fluid/framework/ir/ipu/avg_shard_pass.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
AvgShardPass
:
public
IPUPassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/ipu/forward_graph_extract_pass.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/forward_graph_extract_pass.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
ForwardGraphExtractPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
10
)
<<
"enter ForwardGraphExtractPass::ApplyImpl"
;
std
::
unordered_map
<
OpRole
,
std
::
unordered_set
<
ir
::
Node
*>>
all_ops
{
{
OpRole
::
kForward
,
{}},
{
OpRole
::
kBackward
,
{}},
{
OpRole
::
kOptimize
,
{}},
{
OpRole
::
kRPC
,
{}},
{
OpRole
::
kDist
,
{}},
{
OpRole
::
kLRSched
,
{}},
{
OpRole
::
kLoss
,
{}},
{
OpRole
::
kNotSpecified
,
{}}};
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
!
node
->
IsOp
())
{
continue
;
}
auto
op_role
=
BOOST_GET_MUTABLE
(
int
,
node
->
Op
()
->
GetAttr
(
"op_role"
));
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kForward
))
{
all_ops
[
OpRole
::
kForward
].
insert
(
node
);
}
else
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kBackward
))
{
all_ops
[
OpRole
::
kBackward
].
insert
(
node
);
}
else
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kOptimize
))
{
all_ops
[
OpRole
::
kOptimize
].
insert
(
node
);
}
else
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kRPC
))
{
}
else
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kDist
))
{
}
else
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kLRSched
))
{
}
else
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kLoss
))
{
all_ops
[
OpRole
::
kLoss
].
insert
(
node
);
}
else
if
(
op_role
==
static_cast
<
int
>
(
OpRole
::
kNotSpecified
))
{
LOG
(
WARNING
)
<<
"Op: "
<<
node
->
Name
()
<<
" OpRole is NotSpecified "
;
}
}
std
::
unordered_set
<
ir
::
Node
*>
forward_vars
;
std
::
unordered_set
<
ir
::
Node
*>
backward_vars
;
std
::
unordered_set
<
ir
::
Node
*>
control_vars
;
// forward_vars
for
(
auto
&
nodes
:
std
::
array
<
std
::
unordered_set
<
ir
::
Node
*>
,
2
>
{
all_ops
[
OpRole
::
kForward
],
all_ops
[
OpRole
::
kLoss
]})
{
for
(
auto
*
node
:
nodes
)
{
for
(
auto
*
in_node
:
node
->
inputs
)
{
forward_vars
.
insert
(
in_node
);
}
for
(
auto
*
out_node
:
node
->
outputs
)
{
forward_vars
.
insert
(
out_node
);
}
}
}
// control_vars & backward_vars
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
!
node
->
IsVar
())
{
continue
;
}
if
(
node
->
IsCtrlVar
())
{
control_vars
.
insert
(
node
);
}
for
(
auto
*
in_node
:
node
->
inputs
)
{
if
(
all_ops
[
OpRole
::
kOptimize
].
count
(
in_node
))
{
backward_vars
.
insert
(
node
);
}
}
}
// all removed node
std
::
unordered_set
<
ir
::
Node
*>
rm_nodes
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
backward_vars
.
count
(
node
))
{
rm_nodes
.
insert
(
node
);
}
else
if
(
control_vars
.
count
(
node
))
{
rm_nodes
.
insert
(
node
);
}
else
if
(
all_ops
[
OpRole
::
kBackward
].
count
(
node
))
{
rm_nodes
.
insert
(
node
);
}
else
if
(
all_ops
[
OpRole
::
kForward
].
count
(
node
)
==
0
&&
all_ops
[
OpRole
::
kLoss
].
count
(
node
)
==
0
&&
forward_vars
.
count
(
node
)
==
0
)
{
rm_nodes
.
insert
(
node
);
}
else
if
(
node
->
Name
()
==
"feed"
||
node
->
Name
()
==
"fetch"
)
{
rm_nodes
.
insert
(
node
);
}
}
VLOG
(
10
)
<<
"Remove Node: "
;
for
(
auto
*
node
:
rm_nodes
)
{
// rm node releations
for
(
auto
*
node_in
:
node
->
inputs
)
{
for
(
size_t
i
=
0
;
i
<
node_in
->
outputs
.
size
();
++
i
)
{
if
(
node_in
->
outputs
[
i
]
==
node
)
{
node_in
->
outputs
.
erase
(
node_in
->
outputs
.
begin
()
+
i
);
break
;
}
}
}
for
(
auto
*
node_out
:
node
->
outputs
)
{
for
(
size_t
i
=
0
;
i
<
node_out
->
inputs
.
size
();
++
i
)
{
if
(
node_out
->
inputs
[
i
]
==
node
)
{
node_out
->
inputs
.
erase
(
node_out
->
inputs
.
begin
()
+
i
);
break
;
}
}
}
VLOG
(
10
)
<<
"
\t
"
<<
node
->
Name
();
graph
->
RemoveNode
(
node
);
}
VLOG
(
10
)
<<
"Post Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
VLOG
(
10
)
<<
"leave ForwardGraphExtractPass::ApplyImpl"
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
forward_graph_extract_pass
,
paddle
::
framework
::
ir
::
ForwardGraphExtractPass
);
paddle/fluid/framework/ir/ipu/forward_graph_extract_pass.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
ForwardGraphExtractPass
:
public
IPUPassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/ipu/infer_shape_pass.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/infer_shape_pass.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/variable_helper.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
InferShapePass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
10
)
<<
"enter InferShapePass::ApplyImpl"
;
VLOG
(
10
)
<<
"Raw Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
std
::
shared_ptr
<
platform
::
ipu
::
IpuBackend
>
ipu_backend
=
platform
::
ipu
::
IpuBackend
::
GetInstance
();
auto
batch_size
=
ipu_backend
->
GetIpuStrategy
()
->
batch_size
;
auto
feed_list
=
Get
<
std
::
vector
<
std
::
string
>>
(
"feed_list"
);
for
(
auto
node
:
graph
->
Nodes
())
{
if
(
!
node
->
IsVar
())
{
continue
;
}
bool
is_feed
=
std
::
find
(
feed_list
.
begin
(),
feed_list
.
end
(),
node
->
Name
())
!=
feed_list
.
end
();
if
(
is_feed
)
{
auto
input_shape
=
node
->
Var
()
->
GetShape
();
if
(
input_shape
[
0
]
<=
-
1
)
{
input_shape
[
0
]
=
batch_size
;
node
->
Var
()
->
SetShape
(
input_shape
);
}
// int64->int32
if
(
node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
INT64
)
{
node
->
Var
()
->
SetDataType
(
proto
::
VarType
::
INT32
);
}
}
}
// temp scope for shape inference
std
::
shared_ptr
<
paddle
::
framework
::
Scope
>
scope
(
new
paddle
::
framework
::
Scope
());
for
(
auto
node
:
graph
->
Nodes
())
{
if
(
!
node
->
IsVar
())
{
continue
;
}
auto
var_desc
=
node
->
Var
();
auto
*
ptr
=
scope
->
Var
(
var_desc
->
Name
());
paddle
::
framework
::
InitializeVariable
(
ptr
,
var_desc
->
GetType
());
auto
tensor
=
ptr
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
tensor
->
Resize
(
paddle
::
framework
::
make_ddim
(
var_desc
->
GetShape
()));
}
// infer shape
auto
nodes
=
ir
::
TopologySortOperations
(
*
graph
);
for
(
auto
node
:
nodes
)
{
auto
op_desc
=
node
->
Op
();
auto
op
=
paddle
::
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
);
paddle
::
framework
::
RuntimeContext
ctx
(
op
->
Inputs
(),
op
->
Outputs
(),
*
scope
);
op
->
RuntimeInferShape
(
*
scope
,
paddle
::
platform
::
CPUPlace
(),
ctx
);
for
(
auto
it
=
ctx
.
outputs
.
begin
();
it
!=
ctx
.
outputs
.
end
();
it
++
)
{
for
(
int
i
=
0
;
i
<
it
->
second
.
size
();
i
++
)
{
auto
output_name
=
op_desc
->
Output
(
it
->
first
)[
i
];
auto
dim
=
it
->
second
[
i
]
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
()
->
dims
();
auto
new_shape
=
paddle
::
framework
::
vectorize
(
dim
);
for
(
auto
output_node
:
node
->
outputs
)
{
if
(
output_node
->
Name
()
==
output_name
)
{
output_node
->
Var
()
->
SetShape
(
new_shape
);
}
}
}
}
}
// release the temp scope
scope
.
reset
();
VLOG
(
10
)
<<
"Post Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
VLOG
(
10
)
<<
"leave InferShapePass::ApplyImpl"
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
infer_shape_pass
,
paddle
::
framework
::
ir
::
InferShapePass
)
.
RequirePassAttr
(
"feed_list"
);
paddle/fluid/framework/ir/ipu/infer_shape_pass.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
InferShapePass
:
public
IPUPassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/ipu/inference_postprocess_pass.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/inference_postprocess_pass.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
InferencePostprocessPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
10
)
<<
"enter InferencePostprocessPass::ApplyImpl"
;
std
::
vector
<
std
::
string
>
feed_list
;
feed_list
=
Get
<
std
::
vector
<
std
::
string
>>
(
"feed_list"
);
std
::
vector
<
std
::
string
>
fetch_list
;
fetch_list
=
Get
<
std
::
vector
<
std
::
string
>>
(
"fetch_list"
);
auto
*
feed_var
=
new
paddle
::
framework
::
VarDesc
(
"feed"
);
feed_var
->
SetType
(
proto
::
VarType
::
FEED_MINIBATCH
);
auto
*
feed_var_node
=
graph
->
CreateVarNode
(
feed_var
);
auto
*
fetch_var
=
new
paddle
::
framework
::
VarDesc
(
"fetch"
);
fetch_var
->
SetType
(
proto
::
VarType
::
FETCH_LIST
);
auto
*
fetch_var_node
=
graph
->
CreateVarNode
(
fetch_var
);
for
(
int
i
=
0
;
i
<
feed_list
.
size
();
i
++
)
{
for
(
auto
node
:
graph
->
Nodes
())
{
if
(
node
->
Name
()
==
feed_list
[
i
])
{
auto
*
op
=
new
paddle
::
framework
::
OpDesc
();
op
->
SetType
(
"feed"
);
op
->
SetInput
(
"X"
,
{
"feed"
});
op
->
SetOutput
(
"Out"
,
{
node
->
Name
()});
op
->
SetAttr
(
"col"
,
i
);
auto
*
op_node
=
graph
->
CreateOpNode
(
op
);
node
->
inputs
.
push_back
(
op_node
);
op_node
->
outputs
.
push_back
(
node
);
feed_var_node
->
outputs
.
push_back
(
op_node
);
op_node
->
inputs
.
push_back
(
feed_var_node
);
break
;
}
}
}
for
(
int
i
=
0
;
i
<
fetch_list
.
size
();
i
++
)
{
for
(
auto
node
:
graph
->
Nodes
())
{
if
(
node
->
Name
()
==
fetch_list
[
i
])
{
auto
*
op
=
new
paddle
::
framework
::
OpDesc
();
op
->
SetType
(
"fetch"
);
op
->
SetInput
(
"X"
,
{
node
->
Name
()});
op
->
SetOutput
(
"Out"
,
{
"fetch"
});
op
->
SetAttr
(
"col"
,
i
);
auto
*
op_node
=
graph
->
CreateOpNode
(
op
);
node
->
outputs
.
push_back
(
op_node
);
op_node
->
inputs
.
push_back
(
node
);
fetch_var_node
->
inputs
.
push_back
(
op_node
);
op_node
->
outputs
.
push_back
(
fetch_var_node
);
break
;
}
}
}
VLOG
(
10
)
<<
"leave InferencePostprocessPass::ApplyImpl"
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
inference_postprocess_pass
,
paddle
::
framework
::
ir
::
InferencePostprocessPass
)
.
RequirePassAttr
(
"feed_list"
)
.
RequirePassAttr
(
"fetch_list"
);
paddle/fluid/framework/ir/ipu/inference_postprocess_pass.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
InferencePostprocessPass
:
public
IPUPassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/ipu/inference_process_pass.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/inference_process_pass.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/platform/device/ipu/ipu_strategy.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
InferenceProcessPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
10
)
<<
"enter InferenceProcessPass::ApplyImpl"
;
// Get a new instance of ipu_backend
std
::
shared_ptr
<
platform
::
ipu
::
IpuBackend
>
ipu_backend
=
platform
::
ipu
::
IpuBackend
::
GetNewInstance
();
// Set scope
auto
&
scope
=
graph
->
Get
<
Scope
>
(
kParamScopeAttr
);
ipu_backend
->
SetScope
(
scope
);
// Set ipu_strategy
static
std
::
shared_ptr
<
platform
::
ipu
::
IpuStrategy
>
ipu_strategy_instance_
(
new
platform
::
ipu
::
IpuStrategy
());
ipu_strategy_instance_
->
is_training
=
false
;
auto
num_ipus
=
graph
->
Get
<
int
>
(
"num_ipus"
);
ipu_strategy_instance_
->
num_ipus
=
num_ipus
;
if
(
num_ipus
>
1
)
{
ipu_strategy_instance_
->
popart_options_
.
virtualGraphMode
=
platform
::
ipu
::
VirtualGraphMode
::
Manual
;
}
else
{
ipu_strategy_instance_
->
popart_options_
.
virtualGraphMode
=
platform
::
ipu
::
VirtualGraphMode
::
Off
;
}
auto
enable_pipelining
=
graph
->
Get
<
bool
>
(
"enable_pipelining"
);
ipu_strategy_instance_
->
popart_options_
.
enablePipelining
=
enable_pipelining
;
if
(
enable_pipelining
)
{
auto
batches_per_step
=
graph
->
Get
<
int
>
(
"batches_per_step"
);
PADDLE_ENFORCE_GE
(
batches_per_step
,
num_ipus
,
platform
::
errors
::
InvalidArgument
(
"Batched per step should be equal or "
"greater than the number of IPUs"
));
ipu_strategy_instance_
->
batches_per_step
=
batches_per_step
;
}
ipu_strategy_instance_
->
batch_size
=
graph
->
Get
<
int
>
(
"batch_size"
);
ipu_strategy_instance_
->
need_avg_shard
=
graph
->
Get
<
bool
>
(
"need_avg_shard"
);
ipu_backend
->
SetIpuStrategy
(
*
(
ipu_strategy_instance_
.
get
()));
// Get feed_list and fetch list
std
::
vector
<
std
::
string
>
feed_list
=
{};
std
::
vector
<
std
::
string
>
fetch_list
=
{};
for
(
auto
node
:
graph
->
Nodes
())
{
if
(
node
->
Name
()
==
"feed"
)
{
if
(
node
->
IsOp
())
{
feed_list
.
push_back
(
""
);
}
}
else
if
(
node
->
Name
()
==
"fetch"
)
{
if
(
node
->
IsOp
())
{
fetch_list
.
push_back
(
""
);
}
}
}
for
(
auto
node
:
graph
->
Nodes
())
{
if
(
node
->
Name
()
==
"feed"
)
{
if
(
node
->
IsOp
())
{
feed_list
[
BOOST_GET_CONST
(
int
,
node
->
Op
()
->
GetAttr
(
"col"
))]
=
node
->
outputs
[
0
]
->
Name
();
}
}
else
if
(
node
->
Name
()
==
"fetch"
)
{
if
(
node
->
IsOp
())
{
fetch_list
[
BOOST_GET_CONST
(
int
,
node
->
Op
()
->
GetAttr
(
"col"
))]
=
node
->
inputs
[
0
]
->
Name
();
}
}
}
// Run passes
std
::
vector
<
std
::
string
>
graph_pass
=
{
"forward_graph_extract_pass"
,
"infer_shape_pass"
,
"avg_shard_pass"
,
"popart_canonicalization_pass"
};
std
::
vector
<
std
::
string
>
compile_pass
=
{
"ipu_inplace_pass"
,
"ipu_graph_builder_pass"
,
"ipu_runtime_replacer_pass"
,
"inference_postprocess_pass"
};
for
(
auto
pass_name
:
graph_pass
)
{
auto
pass
=
PassRegistry
::
Instance
().
Get
(
pass_name
);
if
(
pass_name
==
"infer_shape_pass"
)
{
pass
->
Set
(
"feed_list"
,
new
std
::
vector
<
std
::
string
>
(
feed_list
.
begin
(),
feed_list
.
end
()));
}
pass
->
Apply
(
graph
);
}
for
(
auto
pass_name
:
compile_pass
)
{
auto
pass
=
PassRegistry
::
Instance
().
Get
(
pass_name
);
pass
->
Set
(
"feed_list"
,
new
std
::
vector
<
std
::
string
>
(
feed_list
.
begin
(),
feed_list
.
end
()));
pass
->
Set
(
"fetch_list"
,
new
std
::
vector
<
std
::
string
>
(
fetch_list
.
begin
(),
fetch_list
.
end
()));
pass
->
Apply
(
graph
);
}
VLOG
(
10
)
<<
"leave InferenceProcessPass::ApplyImpl"
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
inference_process_pass
,
paddle
::
framework
::
ir
::
InferenceProcessPass
);
paddle/fluid/framework/ir/ipu/inference_process_pass.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
InferenceProcessPass
:
public
IPUPassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/ipu/ipu_graph_builder_pass.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/ipu_graph_builder_pass.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
IpuGraphBuilderPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
10
)
<<
"enter IpuGraphBuilderPass::ApplyImpl"
;
VLOG
(
10
)
<<
"Raw Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
std
::
vector
<
std
::
string
>
feed_list
;
feed_list
=
Get
<
std
::
vector
<
std
::
string
>>
(
"feed_list"
);
std
::
vector
<
std
::
string
>
fetch_list
;
fetch_list
=
Get
<
std
::
vector
<
std
::
string
>>
(
"fetch_list"
);
std
::
shared_ptr
<
platform
::
ipu
::
IpuBackend
>
ipu_backend
=
platform
::
ipu
::
IpuBackend
::
GetInstance
();
ipu_backend
->
Compile
(
graph
,
feed_list
,
fetch_list
);
VLOG
(
10
)
<<
"Post Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
VLOG
(
10
)
<<
"leave IpuGraphBuilderPass::ApplyImpl"
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
ipu_graph_builder_pass
,
paddle
::
framework
::
ir
::
IpuGraphBuilderPass
)
.
RequirePassAttr
(
"feed_list"
)
.
RequirePassAttr
(
"fetch_list"
);
paddle/fluid/framework/ir/ipu/ipu_graph_builder_pass.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
IpuGraphBuilderPass
:
public
IPUPassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/ipu/ipu_inplace_pass.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/ipu_inplace_pass.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
std
::
string
GenerateVarName
(
Node
*
node
)
{
return
node
->
Name
()
+
"_"
+
std
::
to_string
(
node
->
id
());
}
void
IpuInplacePass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
// use this pass after forward_graph_extract_pass
// raise error if the inplaced var both in feed_list & fetch_list
VLOG
(
10
)
<<
"enter IpuInplacePass::ApplyImpl"
;
VLOG
(
10
)
<<
"Raw Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
std
::
vector
<
std
::
string
>
feed_list
;
feed_list
=
Get
<
std
::
vector
<
std
::
string
>>
(
"feed_list"
);
std
::
vector
<
std
::
string
>
fetch_list
;
fetch_list
=
Get
<
std
::
vector
<
std
::
string
>>
(
"fetch_list"
);
std
::
map
<
std
::
string
,
int
>
var_name
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
())
{
if
(
var_name
.
find
(
node
->
Name
())
==
var_name
.
end
())
{
var_name
.
emplace
(
node
->
Name
(),
1
);
}
else
{
var_name
[
node
->
Name
()]
++
;
}
}
}
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
())
{
if
(
var_name
[
node
->
Name
()]
>
1
)
{
auto
is_feed
=
(
std
::
find
(
feed_list
.
begin
(),
feed_list
.
end
(),
node
->
Name
())
!=
feed_list
.
end
())
&&
(
node
->
inputs
.
size
()
==
0
);
auto
is_fetch
=
(
std
::
find
(
fetch_list
.
begin
(),
fetch_list
.
end
(),
node
->
Name
())
!=
fetch_list
.
end
())
&&
(
node
->
outputs
.
size
()
==
0
);
if
(
!
is_feed
&&
!
is_fetch
&&
!
node
->
Var
()
->
Persistable
())
{
auto
old_name
=
node
->
Name
();
auto
new_name
=
GenerateVarName
(
node
);
node
->
RenameVar
(
new_name
);
for
(
auto
*
op_in
:
node
->
inputs
)
{
op_in
->
Op
()
->
RenameOutput
(
old_name
,
new_name
);
}
for
(
auto
*
op_out
:
node
->
outputs
)
{
op_out
->
Op
()
->
RenameInput
(
old_name
,
new_name
);
}
}
}
}
}
VLOG
(
10
)
<<
"Post Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
VLOG
(
10
)
<<
"leave IpuInplacePass::ApplyImpl"
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
ipu_inplace_pass
,
paddle
::
framework
::
ir
::
IpuInplacePass
)
.
RequirePassAttr
(
"feed_list"
)
.
RequirePassAttr
(
"fetch_list"
);
paddle/fluid/framework/ir/ipu/ipu_inplace_pass.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
IpuInplacePass
:
public
IPUPassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/ipu/ipu_pass_base.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
IPUPassBase
::
Init
(
const
std
::
string
&
repr
,
Graph
*
graph
)
const
{
repr_
=
repr
;
graph_
=
graph
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/ipu/ipu_pass_base.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/scope.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
IPUPassBase
:
public
Pass
{
public:
void
Init
(
const
std
::
string
&
repr
,
Graph
*
graph
)
const
;
virtual
~
IPUPassBase
()
{}
protected:
mutable
Graph
*
graph_
;
mutable
std
::
string
repr_
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/ipu/ipu_runtime_replacer_pass.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/ipu_runtime_replacer_pass.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
IpuRuntimeReplacerPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
10
)
<<
"enter IpuRuntimeReplacerPass::ApplyImpl"
;
VLOG
(
10
)
<<
"Raw Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
std
::
vector
<
std
::
string
>
feed_list
;
feed_list
=
Get
<
std
::
vector
<
std
::
string
>>
(
"feed_list"
);
std
::
vector
<
std
::
string
>
fetch_list
;
fetch_list
=
Get
<
std
::
vector
<
std
::
string
>>
(
"fetch_list"
);
framework
::
OpDesc
ipu_rt_op_desc
;
ipu_rt_op_desc
.
SetType
(
"ipu_runtime"
);
ipu_rt_op_desc
.
SetInput
(
"FeedList"
,
feed_list
);
ipu_rt_op_desc
.
SetOutput
(
"FetchList"
,
fetch_list
);
ipu_rt_op_desc
.
Flush
();
// Create a new node for the ipu_runtime_op.
auto
*
ipu_rt_node
=
graph
->
CreateOpNode
(
&
ipu_rt_op_desc
);
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
())
{
for
(
auto
feed
:
feed_list
)
{
if
(
node
->
Name
()
==
feed
)
{
IR_NODE_LINK_TO
(
node
,
ipu_rt_node
);
}
}
for
(
auto
fetch
:
fetch_list
)
{
if
(
node
->
Name
()
==
fetch
)
{
IR_NODE_LINK_TO
(
ipu_rt_node
,
node
);
}
}
}
}
// set ipu_runtime_op dtype attr
if
(
fetch_list
.
size
()
==
1
)
{
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
())
{
for
(
auto
fetch
:
fetch_list
)
{
if
(
node
->
Name
()
==
fetch
)
{
ipu_rt_node
->
Op
()
->
SetAttr
(
"dtype"
,
node
->
Var
()
->
GetDataType
());
}
}
}
}
}
// Remove unneeded nodes.
std
::
unordered_set
<
const
Node
*>
marked_nodes
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
())
{
auto
*
op_desc
=
node
->
Op
();
if
(
op_desc
->
Type
()
!=
"ipu_runtime"
)
{
marked_nodes
.
insert
(
node
);
}
}
}
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
VLOG
(
10
)
<<
"Post Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
VLOG
(
10
)
<<
"leave IpuRuntimeReplacerPass::ApplyImpl"
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
ipu_runtime_replacer_pass
,
paddle
::
framework
::
ir
::
IpuRuntimeReplacerPass
)
.
RequirePassAttr
(
"feed_list"
)
.
RequirePassAttr
(
"fetch_list"
);
paddle/fluid/framework/ir/ipu/ipu_runtime_replacer_pass.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
IpuRuntimeReplacerPass
:
public
IPUPassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/ipu/optimizer_extract_pass.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/optimizer_extract_pass.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
IpuOptimizerExtractPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
10
)
<<
"enter IpuOptimizerExtractPass::ApplyImpl"
;
VLOG
(
10
)
<<
"Raw Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
auto
ipu_backend
=
paddle
::
platform
::
ipu
::
IpuBackend
::
GetInstance
();
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
()
&&
node
->
Op
())
{
int
op_role
=
BOOST_GET_CONST
(
int
,
node
->
Op
()
->
GetAttr
(
framework
::
OpProtoAndCheckerMaker
::
OpRoleAttrName
()));
// graph usually have multiple optimizer node for different parameter,
// and these node have the same type and attr value usually
if
((
op_role
==
static_cast
<
int
>
(
framework
::
OpRole
::
kOptimize
)))
{
ipu_backend
->
GetExecutor
().
SetOptimizerType
(
node
->
Op
()
->
Type
());
VLOG
(
10
)
<<
"found optimizer type: "
<<
node
->
Op
()
->
Type
();
for
(
const
std
::
string
&
attr_name
:
node
->
Op
()
->
AttrNames
())
{
auto
attr_type
=
node
->
Op
()
->
GetAttrType
(
attr_name
);
// with adam, attr are float
if
(
attr_type
==
proto
::
AttrType
::
FLOAT
)
{
auto
attr_value
=
BOOST_GET_CONST
(
float
,
node
->
Op
()
->
GetAttr
(
attr_name
));
ipu_backend
->
GetExecutor
().
SetOptimizerAttr
(
attr_name
,
attr_value
);
}
else
{
VLOG
(
10
)
<<
"Skip "
<<
attr_type
;
}
}
auto
lr_var_name
=
node
->
Op
()
->
Input
(
"LearningRate"
);
PADDLE_ENFORCE_EQ
(
lr_var_name
.
size
(),
1u
,
platform
::
errors
::
InvalidArgument
(
"In op(%s), find input(LearningRate) failed."
,
node
->
Op
()
->
Type
()));
ipu_backend
->
GetExecutor
().
SetLRVarName
(
lr_var_name
[
0
]);
}
if
((
op_role
==
static_cast
<
int
>
(
framework
::
OpRole
::
kLoss
)))
{
VLOG
(
10
)
<<
"found loss op type: "
<<
node
->
Op
()
->
Type
();
auto
outputs
=
node
->
Op
()
->
Outputs
();
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"Can only support one loss key"
));
auto
losses_name
=
outputs
.
begin
()
->
second
;
PADDLE_ENFORCE_EQ
(
losses_name
.
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"Can only support one loss name"
));
ipu_backend
->
GetExecutor
().
SetLoss
(
losses_name
[
0
]);
}
}
}
VLOG
(
10
)
<<
"Post Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
VLOG
(
10
)
<<
"leave IpuOptimizerExtractPass::ApplyImpl"
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
optimizer_extract_pass
,
paddle
::
framework
::
ir
::
IpuOptimizerExtractPass
);
paddle/fluid/framework/ir/ipu/optimizer_extract_pass.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
IpuOptimizerExtractPass
:
public
IPUPassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/ipu/optimizer_state_align_pass.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/optimizer_state_align_pass.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/platform/device/ipu/common.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
using
paddle
::
platform
::
ipu
::
IpuBackend
;
using
framework
::
ir
::
Graph
;
using
framework
::
ir
::
Node
;
void
IpuOptimizerStateAlignPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
10
)
<<
"enter IpuOptimizerStateAlignPass::ApplyImpl"
;
VLOG
(
10
)
<<
"Raw Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
auto
ipu_backend
=
IpuBackend
::
GetInstance
();
const
auto
*
scope_
=
ipu_backend
->
GetScope
();
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
()
&&
node
->
Op
())
{
int
op_role
=
BOOST_GET_CONST
(
int
,
node
->
Op
()
->
GetAttr
(
framework
::
OpProtoAndCheckerMaker
::
OpRoleAttrName
()));
if
((
op_role
==
static_cast
<
int
>
(
framework
::
OpRole
::
kOptimize
)))
{
auto
inputs
=
node
->
Op
()
->
Inputs
();
if
(
inputs
.
count
(
platform
::
ipu
::
sBeta1Pow
))
{
auto
var
=
scope_
->
GetVar
(
inputs
.
at
(
platform
::
ipu
::
sBeta1Pow
)[
0
]);
auto
data
=
var
->
GetMutable
<
framework
::
LoDTensor
>
()
->
data
<
float
>
();
auto
beta
=
BOOST_GET_CONST
(
float
,
node
->
Op
()
->
GetAttr
(
platform
::
ipu
::
sBeta1
));
// ensure current save with beta1pow, rather than step.
// beta1pow = beta1 ^ (step + 1). Just set beta1pow because popart
// support single Step__
bool
save_with_beta1pow
=
(
data
[
0
]
<
1.0
f
)
&&
(
data
[
0
]
>
0.0
f
);
float
step
=
0
;
float
beta_acc
=
beta
;
while
(
beta_acc
>
data
[
0
]
&&
save_with_beta1pow
)
{
beta_acc
*=
beta
;
step
+=
1
;
}
if
(
save_with_beta1pow
)
{
data
[
0
]
=
step
;
}
}
}
}
}
VLOG
(
10
)
<<
"Post Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
VLOG
(
10
)
<<
"leave IpuOptimizerStateAlignPass::ApplyImpl"
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
optimizer_state_align_pass
,
paddle
::
framework
::
ir
::
IpuOptimizerStateAlignPass
);
paddle/fluid/framework/ir/ipu/optimizer_state_align_pass.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
/*
* This pass should only affect optimizer that need bias correction,
* include Adam/Lamb.
*/
class
IpuOptimizerStateAlignPass
:
public
IPUPassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/post_canonicalization.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
using
framework
::
ir
::
Graph
;
using
framework
::
ir
::
Node
;
using
platform
::
ipu
::
SymbolHandler
;
void
PopartCanonicalizationPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
10
)
<<
"enter PopartCanonicalizationPass::ApplyImpl"
;
VLOG
(
10
)
<<
"Raw Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
auto
nodes
=
graph
->
Nodes
();
for
(
auto
*
node
:
nodes
)
{
if
(
!
node
->
IsOp
())
{
continue
;
}
auto
*
op
=
node
->
Op
();
auto
op_type
=
op
->
Type
();
ir
::
Node
*
new_node
=
nullptr
;
SymbolHandler
handler
=
platform
::
ipu
::
GetHandler
(
op_type
);
if
(
handler
)
{
VLOG
(
11
)
<<
"Raw Paddle Node:"
;
VLOG
(
11
)
<<
node
->
Op
()
->
Proto
()
->
DebugString
();
new_node
=
handler
(
graph
,
node
);
VLOG
(
11
)
<<
"Post Popart Node:"
;
VLOG
(
11
)
<<
new_node
->
Op
()
->
Proto
()
->
DebugString
();
platform
::
ipu
::
ClearNode
(
node
);
graph
->
RemoveNode
(
node
);
}
else
{
LOG
(
ERROR
)
<<
"Can not find OpHandler for op_type: "
<<
op_type
;
}
}
VLOG
(
10
)
<<
"Post Graph: "
;
VLOG
(
10
)
<<
DebugString
(
graph
);
VLOG
(
10
)
<<
"leave PopartCanonicalizationPass::ApplyImpl"
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
popart_canonicalization_pass
,
paddle
::
framework
::
ir
::
PopartCanonicalizationPass
);
paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/ipu/ipu_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
PopartCanonicalizationPass
:
public
IPUPassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc
浏览文件 @
883ee1a3
...
...
@@ -84,13 +84,16 @@ void TransposeFlattenConcatFusePass::RunTransposeFlattenConcatFuse(
LOG
(
WARNING
)
<<
"Pass in op compat failed."
;
return
;
}
const
int
kNumFields
=
5
;
const
int
kTransOffset
=
1
;
const
int
kTransOutOffset
=
2
;
const
int
kFlattenOffset
=
3
;
const
int
kFlattenOutOffset
=
4
;
std
::
vector
<
Node
*>
nodes
;
std
::
vector
<
Node
*>
nodes
;
std
::
vector
<
int
>
trans_axis0
;
int
flatten_axis0
;
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
PADDLE_ENFORCE_NOT_NULL
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"transpose"
+
std
::
to_string
(
i
))),
...
...
@@ -112,6 +115,33 @@ void TransposeFlattenConcatFusePass::RunTransposeFlattenConcatFuse(
platform
::
errors
::
NotFound
(
"Can not find %s in subgraph."
,
input_nodes
[
i
]
->
name
()));
if
(
i
==
0
)
{
trans_axis0
=
BOOST_GET_CONST
(
std
::
vector
<
int
>
,
subgraph
.
at
(
pattern
.
GetPDNode
(
"transpose"
+
std
::
to_string
(
0
)))
->
Op
()
->
GetAttr
(
"axis"
));
flatten_axis0
=
BOOST_GET_CONST
(
int
,
subgraph
.
at
(
pattern
.
GetPDNode
(
"flatten"
+
std
::
to_string
(
0
)))
->
Op
()
->
GetAttr
(
"axis"
));
}
else
{
std
::
vector
<
int
>
trans_axis
=
BOOST_GET_CONST
(
std
::
vector
<
int
>
,
subgraph
.
at
(
pattern
.
GetPDNode
(
"transpose"
+
std
::
to_string
(
i
)))
->
Op
()
->
GetAttr
(
"axis"
));
// All axis of transpose should be the same
if
(
trans_axis0
!=
trans_axis
)
return
;
int
flatten_axis
=
BOOST_GET_CONST
(
int
,
subgraph
.
at
(
pattern
.
GetPDNode
(
"flatten"
+
std
::
to_string
(
0
)))
->
Op
()
->
GetAttr
(
"axis"
));
// All axis of flatten should be the same
if
(
flatten_axis0
!=
flatten_axis
)
return
;
}
nodes
.
push_back
(
subgraph
.
at
(
input_nodes
[
i
]));
nodes
.
push_back
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"transpose"
+
std
::
to_string
(
i
))));
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
883ee1a3
...
...
@@ -327,6 +327,9 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
#define REGISTER_OP_CPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
#define REGISTER_OP_IPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, IPU, ::paddle::platform::IPUPlace, __VA_ARGS__)
#define REGISTER_OP_XPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, XPU, ::paddle::platform::XPUPlace, __VA_ARGS__)
...
...
paddle/fluid/framework/paddle2cinn/CMakeLists.txt
浏览文件 @
883ee1a3
...
...
@@ -2,7 +2,7 @@ cc_library(cinn_cache_key SRCS cinn_cache_key.cc DEPS boost graph graph_helper l
cc_library
(
build_cinn_pass SRCS build_cinn_pass.cc DEPS pass subgraph_detector graph_pattern_detector cinn_compiler errors enforce
)
cc_library
(
transform_desc SRCS transform_desc.cc DEPS proto_desc cinn
)
cc_library
(
cinn_graph_symbolization SRCS cinn_graph_symbolization.cc DEPS lod_tensor graph transform_desc cinn
)
cc_library
(
cinn_compiler SRCS cinn_compiler.cc DEPS framework_proto graph lod_tensor cinn_cache_key cinn_graph_symbolization cinn
)
cc_library
(
cinn_compiler SRCS cinn_compiler.cc DEPS framework_proto graph lod_tensor cinn_cache_key cinn_graph_symbolization cinn
cinn_launch_context
)
if
(
WITH_TESTING
)
cc_test
(
cinn_lib_test SRCS cinn_lib_test.cc DEPS cinn
)
...
...
paddle/fluid/framework/paddle2cinn/cinn_cache_key.cc
浏览文件 @
883ee1a3
...
...
@@ -29,55 +29,32 @@ namespace paddle {
namespace
framework
{
namespace
paddle2cinn
{
using
GraphHashStrategy
=
CinnCacheKey
::
GraphHashStrategy
;
CinnCacheKey
::
CinnCacheKey
(
GraphHashStrategy
graph_hash
)
:
graph_hash_
(
graph_hash
)
{}
CinnCacheKey
::
CinnCacheKey
(
const
ir
::
Graph
&
graph
,
const
std
::
map
<
std
::
string
,
const
LoDTensor
*>&
input_tensors
,
const
std
::
string
&
arch_str
)
{
const
std
::
string
&
arch_str
,
GraphHashStrategy
graph_hash
)
:
graph_hash_
(
graph_hash
)
{
this
->
SetKey
(
graph
,
input_tensors
,
arch_str
);
}
CinnCacheKey
::
CinnCacheKey
(
const
ir
::
Graph
&
graph
,
const
std
::
map
<
std
::
string
,
DDim
>&
input_shapes
,
const
std
::
string
&
arch_str
)
{
const
std
::
string
&
arch_str
,
GraphHashStrategy
graph_hash
)
:
graph_hash_
(
graph_hash
)
{
this
->
SetKey
(
graph
,
input_shapes
,
arch_str
);
}
size_t
CinnCacheKey
::
HashGraph
(
const
ir
::
Graph
&
graph
)
{
// using Dot to unqiue graph
inference
::
analysis
::
Dot
dot
;
std
::
unordered_map
<
const
ir
::
Node
*
,
std
::
string
>
node2dot
;
int
id
=
0
;
// Create nodes
// graph.Nodes() return unordered_set, the same graph may
// return different result?
for
(
const
ir
::
Node
*
n
:
graph
.
Nodes
())
{
std
::
string
node_id
=
std
::
to_string
(
id
++
);
dot
.
AddNode
(
node_id
,
{},
n
->
Name
(),
true
);
node2dot
[
n
]
=
node_id
;
}
// Create edges
for
(
const
ir
::
Node
*
n
:
graph
.
Nodes
())
{
const
auto
&
src_id
=
node2dot
.
at
(
n
);
for
(
auto
*
out
:
n
->
outputs
)
{
const
auto
&
dest_id
=
node2dot
.
at
(
out
);
dot
.
AddEdge
(
src_id
,
dest_id
,
{});
}
}
const
std
::
string
&
viz_graph
=
dot
.
Build
();
VLOG
(
1
)
<<
"The hash graph:
\n
"
<<
viz_graph
;
size_t
hash_val
=
std
::
hash
<
std
::
string
>
()(
viz_graph
);
VLOG
(
4
)
<<
"The graph's hash value is: "
<<
hash_val
;
return
hash_val
;
}
void
CinnCacheKey
::
SetKey
(
const
ir
::
Graph
&
graph
,
const
std
::
map
<
std
::
string
,
const
LoDTensor
*>&
input_tensors
,
const
std
::
string
&
arch_str
)
{
graph_
serialize_str_
=
std
::
to_string
(
HashGraph
(
graph
)
);
graph_
hash_val_
=
graph_hash_
(
graph
);
for
(
const
auto
&
name_tensor
:
input_tensors
)
{
input_shapes_
[
name_tensor
.
first
]
=
name_tensor
.
second
->
dims
();
}
...
...
@@ -87,7 +64,7 @@ void CinnCacheKey::SetKey(
void
CinnCacheKey
::
SetKey
(
const
ir
::
Graph
&
graph
,
const
std
::
map
<
std
::
string
,
DDim
>&
input_shapes
,
const
std
::
string
&
arch_str
)
{
graph_
serialize_str_
=
std
::
to_string
(
HashGraph
(
graph
)
);
graph_
hash_val_
=
graph_hash_
(
graph
);
input_shapes_
=
input_shapes
;
arch_str_
=
arch_str
;
}
...
...
@@ -97,7 +74,7 @@ bool CinnCacheKey::operator!=(const CinnCacheKey& other) const {
}
bool
CinnCacheKey
::
operator
==
(
const
CinnCacheKey
&
other
)
const
{
return
graph_
serialize_str_
==
other
.
graph_serialize_str
_
&&
return
graph_
hash_val_
==
other
.
graph_hash_val
_
&&
input_shapes_
==
other
.
input_shapes_
&&
arch_str_
==
other
.
arch_str_
;
}
...
...
@@ -114,11 +91,48 @@ size_t CinnCacheKey::Hash::operator()(const CinnCacheKey& key) const {
ret
=
hash_combine
(
ret
,
string_hasher
(
name_shape
.
second
.
to_str
()));
}
ret
=
hash_combine
(
ret
,
string_hasher
(
key
.
graph_serialize_str_
)
);
ret
=
hash_combine
(
ret
,
key
.
graph_hash_val_
);
ret
=
hash_combine
(
ret
,
string_hasher
(
key
.
arch_str_
));
return
ret
;
}
size_t
CinnCacheKeyByStructure
::
HashGraph
(
const
ir
::
Graph
&
graph
)
{
// sort grad node by name and id.
auto
compare
=
[](
ir
::
Node
*
n1
,
ir
::
Node
*
n2
)
{
return
(
n1
->
Name
()
==
n2
->
Name
())
?
(
n1
->
id
()
<
n2
->
id
())
:
(
n1
->
Name
()
<
n2
->
Name
());
};
// graph.Nodes() return unordered_set, here using set to avoid the same graph
// may return different result
std
::
set
<
ir
::
Node
*
,
bool
(
*
)(
ir
::
Node
*
,
ir
::
Node
*
)
>
node_set
(
compare
),
output_set
(
compare
);
node_set
.
insert
(
graph
.
Nodes
().
begin
(),
graph
.
Nodes
().
end
());
std
::
string
hash_str
;
for
(
ir
::
Node
*
n
:
node_set
)
{
hash_str
.
append
(
n
->
Name
());
output_set
.
clear
();
output_set
.
insert
(
n
->
outputs
.
begin
(),
n
->
outputs
.
end
());
for
(
auto
*
out
:
output_set
)
{
hash_str
.
append
(
out
->
Name
());
}
}
VLOG
(
1
)
<<
"The hash graph:
\n
"
<<
hash_str
;
size_t
hash_val
=
std
::
hash
<
std
::
string
>
()(
hash_str
);
VLOG
(
4
)
<<
"The graph's hash value by graph structure is: "
<<
hash_val
;
return
hash_val
;
}
size_t
CinnCacheKeyByAddress
::
HashGraph
(
const
ir
::
Graph
&
graph
)
{
size_t
hash_val
=
reinterpret_cast
<
size_t
>
(
&
graph
);
VLOG
(
4
)
<<
"The graph's hash value by graph address is: "
<<
hash_val
;
return
hash_val
;
}
}
// namespace paddle2cinn
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/paddle2cinn/cinn_cache_key.h
浏览文件 @
883ee1a3
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <functional>
#include <map>
#include "paddle/fluid/framework/ddim.h"
...
...
@@ -33,14 +34,18 @@ namespace paddle2cinn {
// shapes.
class
CinnCacheKey
{
public:
using
GraphHashStrategy
=
std
::
function
<
size_t
(
const
ir
::
Graph
&
)
>
;
explicit
CinnCacheKey
(
GraphHashStrategy
graph_hash
);
CinnCacheKey
(
const
ir
::
Graph
&
graph
,
const
std
::
map
<
std
::
string
,
const
LoDTensor
*>&
input_tensors
,
const
std
::
string
&
arch_str
);
const
std
::
string
&
arch_str
,
GraphHashStrategy
graph_hash
);
CinnCacheKey
(
const
ir
::
Graph
&
graph
,
const
std
::
map
<
std
::
string
,
DDim
>&
input_shapes
,
const
std
::
string
&
arch_str
);
const
std
::
string
&
arch_str
,
GraphHashStrategy
graph_hash
);
~
CinnCacheKey
()
{}
~
CinnCacheKey
()
=
default
;
void
SetKey
(
const
ir
::
Graph
&
graph
,
const
std
::
map
<
std
::
string
,
const
LoDTensor
*>&
input_tensors
,
...
...
@@ -58,13 +63,38 @@ class CinnCacheKey {
};
private:
size_t
HashGraph
(
const
ir
::
Graph
&
graph
);
std
::
string
graph_serialize_str_
;
GraphHashStrategy
graph_hash_
;
size_t
graph_hash_val_
;
std
::
map
<
std
::
string
,
DDim
>
input_shapes_
;
std
::
string
arch_str_
;
};
#define CINN_CACHE_KEY_CREATE(NAME) \
class NAME : public CinnCacheKey { \
public: \
NAME() : CinnCacheKey(HashGraph) {} \
\
NAME(const ir::Graph& graph, \
const std::map<std::string, const LoDTensor*>& input_tensors, \
const std::string& arch_str) \
: CinnCacheKey(graph, input_tensors, arch_str, HashGraph) {} \
\
NAME(const ir::Graph& graph, \
const std::map<std::string, DDim>& input_shapes, \
const std::string& arch_str) \
: CinnCacheKey(graph, input_shapes, arch_str, HashGraph) {} \
\
private: \
static size_t HashGraph(const ir::Graph& graph); \
};
// Class to store the keys by graph address for compiling CINN.
CINN_CACHE_KEY_CREATE
(
CinnCacheKeyByAddress
)
// Class to store the keys by graph structure for compiling CINN.
CINN_CACHE_KEY_CREATE
(
CinnCacheKeyByStructure
)
#undef CINN_CACHE_KEY_CREATE
}
// namespace paddle2cinn
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/paddle2cinn/cinn_cache_key_test.cc
浏览文件 @
883ee1a3
...
...
@@ -26,8 +26,8 @@ namespace paddle {
namespace
framework
{
namespace
paddle2cinn
{
TEST
(
CinnCacheKeyTest
,
TestAsUnorderedKey
)
{
std
::
unordered_set
<
CinnCacheKey
,
CinnCacheKey
::
Hash
>
test_set
;
TEST
(
CinnCacheKeyTest
,
TestAsUnorderedKey
ByStructure
)
{
std
::
unordered_set
<
CinnCacheKey
ByStructure
,
CinnCacheKey
::
Hash
>
test_set
;
ProgramDesc
empty_program
;
ir
::
Graph
empty_graph
(
empty_program
);
...
...
@@ -47,19 +47,20 @@ TEST(CinnCacheKeyTest, TestAsUnorderedKey) {
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
1
,
2
,
3
});
std
::
map
<
std
::
string
,
DDim
>
feed_shapes
=
{{
"X"
,
ddim
}};
CinnCacheKey
cache_key0
(
empty_graph
,
feed_tensors
,
"x86"
);
CinnCacheKey
cache_key1
(
empty_graph
,
feed_shapes
,
"x86"
);
CinnCacheKey
ByStructure
cache_key0
(
empty_graph
,
feed_tensors
,
"x86"
);
CinnCacheKey
ByStructure
cache_key1
(
empty_graph
,
feed_shapes
,
"x86"
);
EXPECT_EQ
(
cache_key0
,
cache_key1
);
CinnCacheKey
cache_key2
(
graph
,
feed_shapes
,
"x86"
);
CinnCacheKey
cache_key3
(
graph
,
feed_shapes
,
"nvgpu"
);
CinnCacheKey
cache_key4
(
graph
,
feed_tensors
,
"nvgpu"
);
CinnCacheKey
ByStructure
cache_key2
(
graph
,
feed_shapes
,
"x86"
);
CinnCacheKey
ByStructure
cache_key3
(
graph
,
feed_shapes
,
"nvgpu"
);
CinnCacheKey
ByStructure
cache_key4
(
graph
,
feed_tensors
,
"nvgpu"
);
EXPECT_NE
(
cache_key2
,
cache_key3
);
EXPECT_EQ
(
cache_key3
,
cache_key4
);
CinnCacheKey
cache_key5
(
empty_graph
,
std
::
map
<
std
::
string
,
const
LoDTensor
*>
(),
"unk"
);
CinnCacheKey
cache_key6
(
empty_graph
,
std
::
map
<
std
::
string
,
DDim
>
(),
"unk"
);
CinnCacheKeyByStructure
cache_key5
(
empty_graph
,
std
::
map
<
std
::
string
,
const
LoDTensor
*>
(),
"unk"
);
CinnCacheKeyByStructure
cache_key6
(
empty_graph
,
std
::
map
<
std
::
string
,
DDim
>
(),
"unk"
);
EXPECT_EQ
(
cache_key5
,
cache_key6
);
EXPECT_NE
(
cache_key1
,
cache_key3
);
...
...
@@ -98,6 +99,107 @@ TEST(CinnCacheKeyTest, TestAsUnorderedKey) {
EXPECT_EQ
(
test_set
.
find
(
cache_key6
),
test_set
.
end
());
}
TEST
(
CinnCacheKeyTest
,
TestAsUnorderedKeyByAddress
)
{
std
::
unordered_set
<
CinnCacheKeyByAddress
,
CinnCacheKey
::
Hash
>
test_set
;
ProgramDesc
empty_program
;
ir
::
Graph
empty_graph
(
empty_program
);
ProgramDesc
program
;
auto
*
global_block
=
program
.
MutableBlock
(
0
);
auto
*
x
=
global_block
->
Var
(
"X"
);
x
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
ir
::
Graph
graph
(
program
);
LoDTensor
tensor
;
tensor
.
Resize
({
1
,
2
,
3
});
const
LoDTensor
*
tensor_pointer
=
&
tensor
;
std
::
map
<
std
::
string
,
const
LoDTensor
*>
feed_tensors
=
{
{
"X"
,
tensor_pointer
}};
DDim
ddim
=
paddle
::
framework
::
make_ddim
({
1
,
2
,
3
});
std
::
map
<
std
::
string
,
DDim
>
feed_shapes
=
{{
"X"
,
ddim
}};
CinnCacheKeyByAddress
cache_key0
(
empty_graph
,
feed_tensors
,
"x86"
);
CinnCacheKeyByAddress
cache_key1
(
empty_graph
,
feed_shapes
,
"x86"
);
EXPECT_EQ
(
cache_key0
,
cache_key1
);
CinnCacheKeyByAddress
cache_key2
(
graph
,
feed_shapes
,
"x86"
);
CinnCacheKeyByAddress
cache_key3
(
graph
,
feed_shapes
,
"nvgpu"
);
CinnCacheKeyByAddress
cache_key4
(
graph
,
feed_tensors
,
"nvgpu"
);
EXPECT_NE
(
cache_key2
,
cache_key3
);
EXPECT_EQ
(
cache_key3
,
cache_key4
);
CinnCacheKeyByAddress
cache_key5
(
empty_graph
,
std
::
map
<
std
::
string
,
const
LoDTensor
*>
(),
"unk"
);
CinnCacheKeyByAddress
cache_key6
(
empty_graph
,
std
::
map
<
std
::
string
,
DDim
>
(),
"unk"
);
EXPECT_EQ
(
cache_key5
,
cache_key6
);
EXPECT_NE
(
cache_key1
,
cache_key3
);
EXPECT_NE
(
cache_key4
,
cache_key2
);
EXPECT_NE
(
cache_key3
,
cache_key5
);
EXPECT_NE
(
cache_key6
,
cache_key4
);
EXPECT_NE
(
cache_key5
,
cache_key1
);
EXPECT_NE
(
cache_key2
,
cache_key6
);
test_set
.
insert
(
cache_key0
);
test_set
.
insert
(
cache_key1
);
test_set
.
insert
(
cache_key3
);
test_set
.
insert
(
cache_key4
);
test_set
.
insert
(
cache_key5
);
test_set
.
insert
(
cache_key6
);
EXPECT_EQ
(
test_set
.
size
(),
3U
);
auto
iter
=
test_set
.
find
(
cache_key0
);
EXPECT_NE
(
iter
,
test_set
.
end
());
test_set
.
erase
(
iter
);
EXPECT_EQ
(
test_set
.
size
(),
2U
);
EXPECT_EQ
(
test_set
.
find
(
cache_key1
),
test_set
.
end
());
iter
=
test_set
.
find
(
cache_key3
);
EXPECT_NE
(
iter
,
test_set
.
end
());
test_set
.
erase
(
iter
);
EXPECT_EQ
(
test_set
.
size
(),
1U
);
EXPECT_EQ
(
test_set
.
find
(
cache_key4
),
test_set
.
end
());
iter
=
test_set
.
find
(
cache_key5
);
EXPECT_NE
(
iter
,
test_set
.
end
());
test_set
.
erase
(
iter
);
EXPECT_EQ
(
test_set
.
size
(),
0U
);
EXPECT_EQ
(
test_set
.
find
(
cache_key6
),
test_set
.
end
());
}
TEST
(
CinnCacheKeyTest
,
TestSameGraph
)
{
ProgramDesc
program1
;
auto
*
global_block1
=
program1
.
MutableBlock
(
0
);
auto
*
x1
=
global_block1
->
Var
(
"X"
);
x1
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
ir
::
Graph
graph1
(
program1
);
ProgramDesc
program2
;
auto
*
global_block2
=
program2
.
MutableBlock
(
0
);
auto
*
x2
=
global_block2
->
Var
(
"X"
);
x2
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
ir
::
Graph
graph2
(
program2
);
LoDTensor
tensor
;
tensor
.
Resize
({
1
,
2
,
3
});
const
LoDTensor
*
tensor_pointer
=
&
tensor
;
std
::
map
<
std
::
string
,
const
LoDTensor
*>
feed_tensors
=
{
{
"X"
,
tensor_pointer
}};
CinnCacheKeyByAddress
cache_key_by_address1
(
graph1
,
feed_tensors
,
"x86"
);
CinnCacheKeyByAddress
cache_key_by_address2
(
graph2
,
feed_tensors
,
"x86"
);
EXPECT_NE
(
cache_key_by_address1
,
cache_key_by_address2
);
CinnCacheKeyByStructure
cache_key_by_struct1
(
graph1
,
feed_tensors
,
"x86"
);
CinnCacheKeyByStructure
cache_key_by_struct2
(
graph2
,
feed_tensors
,
"x86"
);
EXPECT_EQ
(
cache_key_by_struct1
,
cache_key_by_struct2
);
}
}
// namespace paddle2cinn
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/paddle2cinn/cinn_compiler.cc
浏览文件 @
883ee1a3
...
...
@@ -41,6 +41,7 @@
#include "paddle/fluid/framework/rw_lock.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/operators/cinn/cinn_launch_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/string/string_helper.h"
...
...
@@ -68,23 +69,41 @@ const CinnCompiledObject& CinnCompiler::Compile(
const
std
::
map
<
std
::
string
,
const
LoDTensor
*>&
input_tensors
,
const
Target
&
target
,
void
*
stream
)
{
VLOG
(
1
)
<<
"-- The graph to be compiled is:
\n
"
<<
VizGraph
(
graph
);
CinnCacheKey
cur_key
(
graph
,
input_tensors
,
target
.
arch_str
());
CinnCacheKeyByAddress
cur_key_by_address
(
graph
,
input_tensors
,
target
.
arch_str
());
CinnCacheKeyByStructure
cur_key_by_struct
;
bool
exist
=
false
;
{
AutoRDLock
r_guard
{
&
rwlock_
};
exist
=
cache_
.
count
(
cur_key
)
!=
0
;
exist
=
cache_by_address_
.
count
(
cur_key_by_address
)
!=
0
;
// if cannot find graph by address, checkout whether the graph structure
// have been stored in cache.
if
(
!
exist
)
{
// generate the structure cache key
cur_key_by_struct
.
SetKey
(
graph
,
input_tensors
,
target
.
arch_str
());
// if the graph structure can be found, storing the graph address in
// cache for next query.
if
(
cache_by_struct_
.
count
(
cur_key_by_struct
)
!=
0
)
{
exist
=
true
;
cache_by_address_
[
cur_key_by_address
]
=
cache_by_struct_
.
at
(
cur_key_by_struct
).
get
();
}
}
}
if
(
!
exist
)
{
std
::
int64_t
compiled_num
=
real_compiled_num_
.
fetch_add
(
1
);
auto
compiled_res
=
CompileGraph
(
graph
,
input_tensors
,
target
,
compiled_num
,
stream
);
AutoWRLock
w_guard
{
&
rwlock_
};
if
(
!
cache_
.
count
(
cur_key
))
{
cache_
[
cur_key
]
=
std
::
move
(
compiled_res
);
if
(
!
cache_by_struct_
.
count
(
cur_key_by_struct
))
{
cache_by_address_
[
cur_key_by_address
]
=
compiled_res
.
get
();
cache_by_struct_
[
cur_key_by_struct
]
=
std
::
move
(
compiled_res
);
}
}
AutoRDLock
guard
{
&
rwlock_
};
const
auto
&
cached_boj
=
*
cache_
[
cur_key
];
const
auto
&
cached_boj
=
*
cache_
by_address_
[
cur_key_by_address
];
return
cached_boj
;
}
...
...
@@ -181,7 +200,8 @@ void CinnCompiler::Clear() {
{
AutoWRLock
guard
{
&
rwlock_
};
graphs_
.
clear
();
cache_
.
clear
();
cache_by_address_
.
clear
();
cache_by_struct_
.
clear
();
}
real_compiled_num_
.
store
(
0
);
}
...
...
@@ -217,6 +237,9 @@ std::unique_ptr<CinnCompiledObject> CinnCompiler::CompileGraph(
*
compiled_obj
=
{
std
::
move
(
graph_compiler
),
std
::
move
(
compiled_res
.
runtime_program
),
scope
,
symbol
.
var_model_to_program_map
()};
compiled_obj
->
launch_context
=
std
::
make_unique
<
operators
::
details
::
CinnLaunchContext
>
(
compiled_obj
->
paddle2cinn_varmap
,
compiled_obj
->
scope
);
return
compiled_obj
;
}
...
...
paddle/fluid/framework/paddle2cinn/cinn_compiler.h
浏览文件 @
883ee1a3
...
...
@@ -31,6 +31,13 @@
#include "paddle/fluid/platform/macros.h"
namespace
paddle
{
namespace
operators
{
namespace
details
{
class
CinnLaunchContext
;
}
// namespace details
}
// namespace operators
namespace
framework
{
namespace
paddle2cinn
{
...
...
@@ -39,6 +46,7 @@ struct CinnCompiledObject {
std
::
unique_ptr
<::
cinn
::
hlir
::
framework
::
Program
>
runtime_program
;
std
::
shared_ptr
<::
cinn
::
hlir
::
framework
::
Scope
>
scope
;
std
::
unordered_map
<
std
::
string
,
std
::
string
>
paddle2cinn_varmap
;
std
::
unique_ptr
<
operators
::
details
::
CinnLaunchContext
>
launch_context
;
};
// Entrance to use CINN.
...
...
@@ -87,9 +95,12 @@ class CinnCompiler {
void
*
stream
=
nullptr
)
const
;
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
ir
::
Graph
>>
graphs_
;
std
::
unordered_map
<
CinnCacheKey
,
std
::
unique_ptr
<
CinnCompiledObject
>
,
std
::
unordered_map
<
CinnCacheKey
ByAddress
,
CinnCompiledObject
*
,
CinnCacheKey
::
Hash
>
cache_
;
cache_by_address_
;
std
::
unordered_map
<
CinnCacheKeyByStructure
,
std
::
unique_ptr
<
CinnCompiledObject
>
,
CinnCacheKey
::
Hash
>
cache_by_struct_
;
std
::
atomic_int64_t
real_compiled_num_
{
0
};
mutable
RWLock
rwlock_
;
...
...
paddle/fluid/framework/pten_utils.cc
浏览文件 @
883ee1a3
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#include <sstream>
#include "paddle/fluid/framework/pten_utils.h"
#include "paddle/pten/core/convert_utils.h"
#include "paddle/pten/core/kernel_factory.h"
#include "paddle/fluid/framework/lod_tensor.h"
...
...
@@ -190,8 +191,9 @@ KernelArgsNameMakerByOpProto::GetAttrsArgsNames() {
}
KernelSignature
KernelArgsNameMakerByOpProto
::
GetKernelSignature
()
{
return
KernelSignature
(
op_proto_
->
type
(),
GetInputArgsNames
(),
GetAttrsArgsNames
(),
GetOutputArgsNames
());
return
KernelSignature
(
pten
::
TransToPtenKernelName
(
op_proto_
->
type
()),
GetInputArgsNames
(),
GetAttrsArgsNames
(),
GetOutputArgsNames
());
}
std
::
string
KernelSignatureToString
(
const
KernelSignature
&
signature
)
{
...
...
paddle/fluid/framework/tensor_util.cc
浏览文件 @
883ee1a3
...
...
@@ -76,6 +76,22 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
#ifdef PADDLE_WITH_IPU
else
if
(
platform
::
is_ipu_place
(
src_place
)
&&
// NOLINT
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
IPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_ipu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
IPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_ipu_place
(
src_place
)
&&
platform
::
is_ipu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
IPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
IPUPlace
,
src_place
),
src_ptr
,
size
);
}
#endif
#ifdef PADDLE_WITH_XPU
else
if
(
platform
::
is_xpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_cpu_place
(
dst_place
))
{
...
...
@@ -386,17 +402,33 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
#ifdef PADDLE_WITH_IPU
else
if
(
platform
::
is_ipu_place
(
src_place
)
&&
// NOLINT
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
IPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_cpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_ipu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
IPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
{
// NOLINT
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Copy from %s to %s is not supported."
,
src_place
,
dst_place
));
}
#endif
#ifdef PADDLE_WITH_XPU
else
if
(
platform
::
is_xpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
XPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_cpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_xpu_place
(
dst_place
))
{
}
else
if
(
platform
::
is_cpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_xpu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
XPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_xpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_xpu_place
(
dst_place
))
{
}
else
if
(
platform
::
is_xpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_xpu_place
(
dst_place
))
{
if
(
src_ptr
==
dst_ptr
)
{
VLOG
(
3
)
<<
"Skip copy the same data async from "
<<
src_place
<<
" to "
<<
dst_place
;
...
...
@@ -404,7 +436,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
}
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
XPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
XPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
{
// NOLINT
}
else
{
// NOLINT
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Copy from %s to %s is not supported."
,
src_place
,
dst_place
));
}
...
...
@@ -571,6 +604,11 @@ class AnyVisitor : public boost::static_visitor<bool> {
platform
::
errors
::
Unimplemented
(
"Not supported on place (%s) "
,
npu
));
// return GetResultHelper(out, npu);
}
bool
GetResult
(
const
framework
::
Tensor
&
out
,
const
platform
::
IPUPlace
&
ipu
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Not supported on place (%s) "
,
ipu
));
}
bool
GetResult
(
const
framework
::
Tensor
&
out
,
const
platform
::
NPUPinnedPlace
&
cpu
)
const
{
...
...
@@ -762,6 +800,9 @@ struct BothFalseVisitor : public boost::static_visitor<> {
void
VisitorImpl
(
const
platform
::
XPUPlace
&
xpu
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"XPUPlace is not supported"
));
}
void
VisitorImpl
(
const
platform
::
IPUPlace
&
ipu
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"IPUPlace is not supported"
));
}
void
VisitorImpl
(
const
platform
::
CUDAPlace
&
gpu
)
const
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
paddle/fluid/imperative/gradient_accumulator.cc
浏览文件 @
883ee1a3
...
...
@@ -155,6 +155,13 @@ class TensorAddFunctor : public boost::static_visitor<> {
"is not supported in imperative mode"
,
place
));
}
// there is NO support in IPUPlace
void
operator
()(
const
platform
::
IPUPlace
&
place
)
{
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode"
,
place
));
}
private:
int64_t
numel_
;
...
...
paddle/fluid/imperative/prepared_operator.cc
浏览文件 @
883ee1a3
...
...
@@ -487,6 +487,14 @@ static void PreparedOpRunImpl(
op
.
Type
(),
outs
,
dev_ctx
->
GetPlace
());
}
if
(
FLAGS_benchmark
)
{
dev_ctx
->
Wait
();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PADDLE_ENFORCE_GPU_SUCCESS
(
platform
::
GpuGetLastError
());
VLOG
(
4
)
<<
"Operator("
<<
op
.
Type
()
<<
"): context wait and get last error"
;
#endif
}
/**
* [ Why need handle complex gradient to real gradient? ]
*
...
...
paddle/fluid/imperative/reducer.cc
浏览文件 @
883ee1a3
...
...
@@ -211,70 +211,6 @@ void SplitTensorsWithType<platform::XPUDeviceContext>(
}
#endif
// NOTE(liubo48): Only implement operators::math::SplitFunctor for npu now.
// If later the operators::StridedMemcpyWithAxis0 is supported,
// then this specific SplitTensorsForAllReduce can be removed.
#ifdef PADDLE_WITH_ASCEND_CL
template
<
>
void
SplitTensorsForAllReduce
<
platform
::
NPUDeviceContext
,
float
>
(
const
platform
::
NPUDeviceContext
&
context
,
framework
::
Variable
*
p_dense_contents
,
std
::
vector
<
framework
::
Tensor
>
*
p_dense_tensors
)
{
auto
*
in
=
p_dense_contents
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
vector
<
framework
::
Tensor
*>
outs
;
std
::
vector
<
const
framework
::
Tensor
*>
shape_refer
;
outs
.
reserve
(
p_dense_tensors
->
size
());
shape_refer
.
reserve
(
p_dense_tensors
->
size
());
for
(
auto
&
tensor
:
*
p_dense_tensors
)
{
outs
.
emplace_back
(
&
tensor
);
shape_refer
.
emplace_back
(
&
tensor
);
}
operators
::
math
::
SplitFunctor
<
platform
::
NPUDeviceContext
,
float
>
split_functor_
;
split_functor_
(
context
,
*
in
,
shape_refer
,
0
,
&
outs
);
}
template
<
>
void
ConcatTensorsWithType
<
platform
::
NPUDeviceContext
>
(
const
platform
::
NPUDeviceContext
&
context
,
const
std
::
vector
<
framework
::
Tensor
>
&
dense_tensors_
,
framework
::
Variable
*
p_dense_contents
,
framework
::
proto
::
VarType
::
Type
type
)
{
switch
(
type
)
{
case
framework
::
proto
::
VarType
::
FP32
:
ConcatTensorsForAllReduce
<
platform
::
NPUDeviceContext
,
float
>
(
context
,
dense_tensors_
,
p_dense_contents
);
break
;
default:
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Data type (%s) is not supported when it concats tensors for "
"allreduce."
,
framework
::
DataTypeToString
(
type
)));
}
}
template
<
>
void
SplitTensorsWithType
<
platform
::
NPUDeviceContext
>
(
const
platform
::
NPUDeviceContext
&
context
,
framework
::
Variable
*
p_dense_contents
,
std
::
vector
<
framework
::
Tensor
>
*
p_dense_tensors
,
framework
::
proto
::
VarType
::
Type
type
)
{
switch
(
type
)
{
case
framework
::
proto
::
VarType
::
FP32
:
SplitTensorsForAllReduce
<
platform
::
NPUDeviceContext
,
float
>
(
context
,
p_dense_contents
,
p_dense_tensors
);
break
;
default:
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Data type (%s) is not supported when it splits tensors for "
"allreduce."
,
framework
::
DataTypeToString
(
type
)));
}
}
#endif
void
Group
::
ConcatTensors
(
const
platform
::
DeviceContext
&
context
)
{
auto
place
=
context
.
GetPlace
();
if
(
platform
::
is_gpu_place
(
place
))
{
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
883ee1a3
...
...
@@ -348,13 +348,14 @@ class AllocatorFacadePrivate {
const
AllocatorMap
&
GetAllocatorMap
()
{
#ifdef PADDLE_WITH_CUDA
if
(
UNLIKELY
(
platform
::
CUDAGraph
::
IsCapturing
()))
{
if
(
UNLIKELY
(
platform
::
CUDAGraph
::
Is
ThisThread
Capturing
()))
{
auto
id
=
platform
::
CUDAGraph
::
CapturingID
();
auto
iter
=
cuda_graph_allocator_map_
.
find
(
id
);
PADDLE_ENFORCE_NE
(
iter
,
cuda_graph_allocator_map_
.
end
(),
platform
::
errors
::
PermissionDenied
(
"No memory pool is prepared for CUDA Graph capturing."
));
VLOG
(
10
)
<<
"Choose CUDA Graph memory pool to allocate memory"
;
return
iter
->
second
->
allocators_
;
}
else
{
return
allocators_
;
...
...
@@ -405,7 +406,7 @@ class AllocatorFacadePrivate {
#if defined(PADDLE_WITH_HIP)
auto
cuda_allocator
=
std
::
make_shared
<
CUDAAllocator
>
(
p
);
cuda_allocators_
[
p
][
stream
]
=
std
::
make_shared
<
AutoGrowthBestFitAllocator
>
(
cuda_allocator
,
platform
::
GpuMinChunkSize
(),
allow_free_idle_chunk_
);
cuda_allocator
,
platform
::
GpuMinChunkSize
(),
0
,
allow_free_idle_chunk_
);
#endif
#if defined(PADDLE_WITH_CUDA)
...
...
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
浏览文件 @
883ee1a3
...
...
@@ -116,6 +116,34 @@ size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) {
return
GetCPUBuddyAllocator
()
->
Used
();
}
// For Graphcore IPU
template
<
>
void
*
Alloc
<
platform
::
IPUPlace
>
(
const
platform
::
IPUPlace
&
place
,
size_t
size
)
{
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
VLOG
(
10
)
<<
"IPUPlace, Allocate on cpu."
;
void
*
p
=
GetCPUBuddyAllocator
()
->
Alloc
(
size
);
if
(
FLAGS_init_allocated_mem
)
{
memset
(
p
,
0xEF
,
size
);
}
VLOG
(
10
)
<<
" pointer="
<<
p
;
return
p
;
}
template
<
>
void
Free
<
platform
::
IPUPlace
>
(
const
platform
::
IPUPlace
&
place
,
void
*
p
,
size_t
size
)
{
VLOG
(
10
)
<<
"Free pointer="
<<
p
<<
" on "
<<
platform
::
Place
(
place
);
GetCPUBuddyAllocator
()
->
Free
(
p
);
}
template
<
>
uint64_t
Release
<
platform
::
IPUPlace
>
(
const
platform
::
IPUPlace
&
place
)
{
return
GetCPUBuddyAllocator
()
->
Release
();
}
template
<
>
size_t
Used
<
platform
::
IPUPlace
>
(
const
platform
::
IPUPlace
&
place
)
{
return
GetCPUBuddyAllocator
()
->
Used
();
}
// For kunlun XPU
template
<
>
void
*
Alloc
<
platform
::
XPUPlace
>
(
const
platform
::
XPUPlace
&
place
,
size_t
size
)
{
...
...
paddle/fluid/operators/cinn/cinn_launch_context.cc
浏览文件 @
883ee1a3
...
...
@@ -32,9 +32,34 @@ CinnLaunchContext::CinnLaunchContext(
[](
const
auto
&
name_view
)
{
return
std
::
string
(
name_view
.
data
());
});
}
bool
CinnLaunchContext
::
IsVariableUsed
(
const
std
::
string
&
paddle_name
)
{
return
paddle2cinn_varmap_
.
count
(
paddle_name
)
>
0
&&
cinn_variable_names_
.
count
(
paddle2cinn_varmap_
.
at
(
paddle_name
))
>
0
;
void
CinnLaunchContext
::
UpdateCapturedEnv
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
if
(
std
::
addressof
(
scope
)
==
cached_scope_
&&
std
::
addressof
(
place
)
==
cached_place_
)
{
VLOG
(
4
)
<<
"Captured scope:"
<<
cached_scope_
<<
", place:"
<<
cached_place_
<<
" are not changed"
;
return
;
}
cached_scope_
=
std
::
addressof
(
scope
);
cached_place_
=
std
::
addressof
(
place
);
cached_temp_scope_
=
scope
.
NewTmpScope
();
VLOG
(
4
)
<<
"Captured env is update, scope:"
<<
cached_scope_
<<
"->"
<<
std
::
addressof
(
scope
)
<<
", place:"
<<
cached_place_
<<
"->"
<<
std
::
addressof
(
place
);
}
bool
CinnLaunchContext
::
IsArgumentsInitialized
()
const
{
if
(
hold_buffers_
.
empty
()
||
name2argument_
.
empty
())
{
return
false
;
}
return
true
;
}
bool
CinnLaunchContext
::
IsVariableUsed
(
const
std
::
string
&
paddle_var_name
)
const
{
return
paddle2cinn_varmap_
.
count
(
paddle_var_name
)
>
0
&&
cinn_variable_names_
.
count
(
paddle2cinn_varmap_
.
at
(
paddle_var_name
))
>
0
;
}
CinnTensor
CinnLaunchContext
::
GetCinnTensor
(
const
std
::
string
&
var_name
)
{
...
...
@@ -53,99 +78,101 @@ std::unordered_set<std::string> CinnLaunchContext::GetInternalVariableNames() {
return
all_parameters
;
}
void
CinnLaunchContext
::
CheckTensorEquivalent
(
const
std
::
string
&
paddle_name
,
const
LoDTensor
&
paddle_tensor
,
const
CinnTensor
&
cinn_tensor
)
{
void
CinnLaunchContext
::
CheckTensorEquivalent
(
const
std
::
string
&
paddle_var_name
,
const
LoDTensor
&
paddle_tensor
,
const
CinnTensor
&
cinn_tensor
)
{
// check dimension
auto
cinn_dims
=
framework
::
make_ddim
(
cinn_tensor
->
shape
().
data
());
PADDLE_ENFORCE_EQ
(
paddle_tensor
.
dims
(),
cinn_dims
,
platform
::
errors
::
PreconditionNotMet
(
"Tensors' shape in variable(%s) are not equivalent, "
"paddle's shape = [%s], but cinn's shape = [%s]."
,
paddle_name
,
paddle_tensor
.
dims
(),
cinn_dims
));
paddle_
var_
name
,
paddle_tensor
.
dims
(),
cinn_dims
));
// TODO(CtfGo): check the underlying data type after CINN ready
}
void
CinnLaunchContext
::
AssignExternalVariable
(
const
std
::
string
&
paddle_name
,
const
platform
::
Place
&
place
,
LoDTensor
*
paddle_tensor
)
{
PADDLE_ENFORCE_EQ
(
IsVariableUsed
(
paddle_name
),
true
,
platform
::
errors
::
InvalidArgument
(
"Paddle variable(%s) not used by cinn"
,
paddle_name
));
const
auto
&
cinn_name
=
paddle2cinn_varmap_
.
at
(
paddle_name
);
CinnTensor
cinn_tensor
=
GetCinnTensor
(
cinn_name
);
if
(
!
paddle_tensor
->
IsInitialized
())
{
paddle_tensor
->
Resize
(
framework
::
make_ddim
(
cinn_tensor
->
shape
().
data
()));
void
CinnLaunchContext
::
AssignExternalVariable
(
const
std
::
string
&
paddle_var_name
)
{
PADDLE_ENFORCE_EQ
(
IsVariableUsed
(
paddle_var_name
),
true
,
platform
::
errors
::
InvalidArgument
(
"Paddle variable(%s) not used by cinn"
,
paddle_var_name
));
const
auto
&
cinn_var_name
=
paddle2cinn_varmap_
.
at
(
paddle_var_name
);
const
auto
&
paddle_tensor
=
cached_scope_
->
GetVar
(
paddle_var_name
)
->
Get
<
LoDTensor
>
();
CinnTensor
cinn_tensor
=
GetCinnTensor
(
cinn_var_name
);
if
(
paddle_tensor
.
IsInitialized
())
{
CheckTensorEquivalent
(
paddle_var_name
,
paddle_tensor
,
cinn_tensor
);
}
CheckTensorEquivalent
(
paddle_name
,
*
paddle_tensor
,
cinn_tensor
);
return
SetArgument
(
cinn_name
,
place
,
/* free_mem_callback = */
false
,
paddle_tensor
);
}
void
CinnLaunchContext
::
AssignInternalVariable
(
const
std
::
string
&
cinn_name
,
const
platform
::
Place
&
place
,
LoDTensor
*
paddle_tensor
)
{
PADDLE_ENFORCE_GT
(
cinn_variable_names_
.
count
(
cinn_name
),
0
,
platform
::
errors
::
InvalidArgument
(
"Variable(%s) not found in cinn socpe."
,
cinn_name
));
CinnTensor
cinn_tensor
=
GetCinnTensor
(
cinn_name
);
if
(
!
paddle_tensor
->
IsInitialized
())
{
paddle_tensor
->
Resize
(
framework
::
make_ddim
(
cinn_tensor
->
shape
().
data
()));
}
CheckTensorEquivalent
(
cinn_name
,
*
paddle_tensor
,
cinn_tensor
);
return
SetArgument
(
cinn_name
,
place
,
/* free_mem_callback = */
true
,
paddle_tensor
);
}
auto
cinn_buffer
=
std
::
make_unique
<
cinn_buffer_t
>
();
// assign dimensions and alloc/free callback of cinn_buffer_t
cinn_buffer
->
resize
(
cinn_tensor
->
shape
().
data
().
data
(),
cinn_tensor
->
shape
().
data
().
size
());
cinn_buffer
->
external_malloc
=
new
std
::
function
<
int
(
void
*
,
cinn_buffer_t
*
)
>
(
[
this
,
paddle_var_name
](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
auto
*
tensor
=
cached_scope_
->
GetVar
(
paddle_var_name
)
->
GetMutable
<
LoDTensor
>
();
tensor
->
Resize
(
framework
::
DDim
(
buffer
->
dims
,
buffer
->
dimensions
));
buffer
->
memory
=
reinterpret_cast
<
uint8_t
*>
(
tensor
->
mutable_data
<
float
>
(
*
cached_place_
));
return
0
;
});
std
::
unique_ptr
<
cinn_buffer_t
>
CinnLaunchContext
::
ShareTensorWithCinnBuffer
(
const
platform
::
Place
&
place
,
bool
free_mem_callback
,
LoDTensor
*
tensor
)
{
// convert paddle dimensions array to cinn format
std
::
vector
<
cinn_dimension_t
>
cinn_dims
(
tensor
->
dims
().
size
());
for
(
auto
i
=
0
;
i
<
tensor
->
dims
().
size
();
++
i
)
{
cinn_dims
[
i
]
=
static_cast
<
cinn_dimension_t
>
(
tensor
->
dims
().
at
(
i
));
}
// external variables will be recycled by global gc, so do nothing here
cinn_buffer
->
external_free
=
new
std
::
function
<
int
(
void
*
,
cinn_buffer_t
*
)
>
(
[](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
// Do nothing
return
0
;
});
return
SetArgument
(
cinn_var_name
,
std
::
move
(
cinn_buffer
));
}
void
CinnLaunchContext
::
AssignInternalVariable
(
const
std
::
string
&
cinn_var_name
)
{
PADDLE_ENFORCE_GT
(
cinn_variable_names_
.
count
(
cinn_var_name
),
0
,
platform
::
errors
::
InvalidArgument
(
"Variable(%s) not found in cinn socpe."
,
cinn_var_name
));
CinnTensor
cinn_tensor
=
GetCinnTensor
(
cinn_var_name
);
auto
cinn_buffer
=
std
::
make_unique
<
cinn_buffer_t
>
();
// assign size and memory
cinn_buffer
->
resize
(
cinn_dims
.
data
(),
cinn_dims
.
size
());
// assign dimensions and alloc/free callback of cinn_buffer_t
cinn_buffer
->
resize
(
cinn_tensor
->
shape
().
data
().
data
(),
cinn_tensor
->
shape
().
data
().
size
());
cinn_buffer
->
external_malloc
=
new
std
::
function
<
int
(
void
*
,
cinn_buffer_t
*
)
>
(
[
place
,
tensor
](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
buffer
->
memory
=
reinterpret_cast
<
uint8_t
*>
(
tensor
->
mutable_data
<
float
>
(
place
));
[
this
,
cinn_var_name
](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
auto
*
tensor
=
cached_temp_scope_
->
Var
(
cinn_var_name
)
->
GetMutable
<
LoDTensor
>
();
tensor
->
Resize
(
framework
::
DDim
(
buffer
->
dims
,
buffer
->
dimensions
));
buffer
->
memory
=
reinterpret_cast
<
uint8_t
*>
(
tensor
->
mutable_data
<
float
>
(
*
cached_place_
));
return
0
;
});
if
(
free_mem_callback
)
{
cinn_buffer
->
external_free
=
new
std
::
function
<
int
(
void
*
,
cinn_buffer_t
*
)
>
(
[
tensor
](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
tensor
->
clear
();
return
0
;
});
return
cinn_buffer
;
}
// internal variables should release its buffer immediately
// if no instruction use it
cinn_buffer
->
external_free
=
new
std
::
function
<
int
(
void
*
,
cinn_buffer_t
*
)
>
(
[](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
// Do nothing
[
this
,
cinn_var_name
](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
auto
*
tensor
=
cached_temp_scope_
->
GetVar
(
cinn_var_name
)
->
GetMutable
<
LoDTensor
>
();
tensor
->
clear
();
return
0
;
});
return
cinn_buffer
;
return
SetArgument
(
cinn_var_name
,
std
::
move
(
cinn_buffer
))
;
}
void
CinnLaunchContext
::
SetArgument
(
const
std
::
string
&
cinn_name
,
const
platform
::
Place
&
place
,
bool
free_mem_callback
,
LoDTensor
*
paddle_tensor
)
{
auto
buffer
=
ShareTensorWithCinnBuffer
(
place
,
free_mem_callback
,
paddle_tensor
);
name2argument_
.
emplace
(
cinn_name
,
buffer
.
get
());
void
CinnLaunchContext
::
SetArgument
(
const
std
::
string
&
cinn_
var_
name
,
std
::
unique_ptr
<
cinn_buffer_t
>&&
buffer
)
{
VLOG
(
4
)
<<
"SetArgument-"
<<
name2argument_
.
size
()
<<
": name("
<<
cinn_var_name
<<
"), dims("
<<
framework
::
DDim
(
buffer
->
dims
,
buffer
->
dimensions
)
<<
")."
;
name2argument_
.
emplace
(
cinn_
var_
name
,
buffer
.
get
());
hold_buffers_
.
emplace_back
(
std
::
move
(
buffer
));
VLOG
(
4
)
<<
"SetArgument-"
<<
name2argument_
.
size
()
<<
": "
<<
"name("
<<
cinn_name
<<
"), dims("
<<
paddle_tensor
->
dims
()
<<
")."
;
}
const
std
::
map
<
std
::
string
,
cinn_pod_value_t
>&
...
...
paddle/fluid/operators/cinn/cinn_launch_context.h
浏览文件 @
883ee1a3
...
...
@@ -24,7 +24,7 @@
#include "cinn/runtime/cinn_runtime.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/
platform/plac
e.h"
#include "paddle/fluid/
framework/scop
e.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -40,16 +40,22 @@ class CinnLaunchContext {
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
paddle2cinn_varmap
,
const
std
::
shared_ptr
<
CinnScope
>&
cinn_scope
);
// explicitly update several environment variables captured
// by callback of execution arguments
void
UpdateCapturedEnv
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
);
// Return whether execution arguments has been initialized
bool
IsArgumentsInitialized
()
const
;
// Return whether a Paddle variable used on compiled kernels
bool
IsVariableUsed
(
const
std
::
string
&
var_name
)
;
bool
IsVariableUsed
(
const
std
::
string
&
paddle_var_name
)
const
;
// Assign tensor buffer to input or output variables
void
AssignExternalVariable
(
const
std
::
string
&
var_name
,
const
platform
::
Place
&
place
,
LoDTensor
*
tensor
);
void
AssignExternalVariable
(
const
std
::
string
&
paddle_var_name
);
// Assign tensor buffer to internal variables
void
AssignInternalVariable
(
const
std
::
string
&
var_name
,
const
platform
::
Place
&
place
,
LoDTensor
*
tensor
);
void
AssignInternalVariable
(
const
std
::
string
&
cinn_var_name
);
// Extract internal variable names from CinnScope
// by excluding used input and output variables
...
...
@@ -58,10 +64,6 @@ class CinnLaunchContext {
// Finalize all execution arguments and return them
const
std
::
map
<
std
::
string
,
cinn_pod_value_t
>&
FinalizeArguments
()
const
;
std
::
vector
<
std
::
unique_ptr
<
cinn_buffer_t
>>
HandoverBuffers
()
{
return
std
::
move
(
hold_buffers_
);
}
private:
// Get CinnTensor with CINN variable name
CinnTensor
GetCinnTensor
(
const
std
::
string
&
var_name
);
...
...
@@ -72,16 +74,15 @@ class CinnLaunchContext {
const
LoDTensor
&
paddle_tensor
,
const
CinnTensor
&
cinn_tensor
);
// Share the buffer of a Paddle tensor to CINN by delivering memory address
// to a cinn_buffer_t object
std
::
unique_ptr
<
cinn_buffer_t
>
ShareTensorWithCinnBuffer
(
const
platform
::
Place
&
place
,
bool
free_mem_callback
,
LoDTensor
*
tensor
);
// Set an argument with (cinn name)->(paddle tensor) pair
void
SetArgument
(
const
std
::
string
&
cinn_name
,
const
platform
::
Place
&
place
,
bool
free_mem_callback
,
LoDTensor
*
paddle_tensor
);
// Set an argument with (cinn name)->(cinn_buffer_t) pair
void
SetArgument
(
const
std
::
string
&
cinn_var_name
,
std
::
unique_ptr
<
cinn_buffer_t
>&&
buffer
);
private:
const
framework
::
Scope
*
cached_scope_
=
nullptr
;
const
platform
::
Place
*
cached_place_
=
nullptr
;
std
::
unique_ptr
<
framework
::
Scope
>
cached_temp_scope_
=
nullptr
;
// a variable name map from paddle to cinn
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
paddle2cinn_varmap_
;
// the variable scope of cinn
...
...
paddle/fluid/operators/cinn/cinn_launch_context_test.cc
浏览文件 @
883ee1a3
...
...
@@ -45,81 +45,86 @@ std::unique_ptr<CinnLaunchContext> CreateDefaultLaunchContext() {
return
std
::
make_unique
<
CinnLaunchContext
>
(
paddle2cinn_varmap
,
cinn_scope
);
}
TEST
(
CinnLaunchContextTest
,
Test
IsVariableUsed
)
{
TEST
(
CinnLaunchContextTest
,
Test
Basic
)
{
auto
launch_context
=
CreateDefaultLaunchContext
();
// test IsVariableUsed
ASSERT_EQ
(
launch_context
->
IsVariableUsed
(
"var1"
),
true
);
ASSERT_EQ
(
launch_context
->
IsVariableUsed
(
"var4"
),
false
);
}
TEST
(
CinnLaunchContextTest
,
TestGetInternalVariableNames
)
{
auto
launch_context
=
CreateDefaultLaunchContext
();
auto
internal_variable_names
=
launch_context
->
GetInternalVariableNames
();
ASSERT_EQ
(
internal_variable_names
.
size
(),
3
);
EXPECT_NE
(
internal_variable_names
.
find
(
"cinn_var2"
),
internal_variable_names
.
end
());
// test UpdateCapturedEnv
platform
::
CPUPlace
place
;
framework
::
Scope
scope
;
ASSERT_NO_THROW
(
launch_context
->
UpdateCapturedEnv
(
scope
,
place
));
// test IsArgumentsInitialized
ASSERT_FALSE
(
launch_context
->
IsArgumentsInitialized
());
}
TEST
(
CinnLaunchContextTest
,
TestCheckTensorEquivalent
)
{
auto
launch_context
=
CreateDefaultLaunchContext
();
platform
::
CPUPlace
place
;
framework
::
Scope
scope
;
auto
launch_context
=
CreateDefaultLaunchContext
();
launch_context
->
UpdateCapturedEnv
(
scope
,
place
);
auto
*
tensor1
=
scope
.
Var
(
"var1"
)
->
GetMutable
<
LoDTensor
>
();
// CheckTensorEquivalent: tensor dimension not equivalent
tensor1
->
mutable_data
<
float
>
(
framework
::
make_ddim
({
3
,
5
}),
place
);
ASSERT_THROW
(
launch_context
->
AssignExternalVariable
(
"var1"
,
place
,
tensor1
),
ASSERT_THROW
(
launch_context
->
AssignExternalVariable
(
"var1"
),
paddle
::
platform
::
EnforceNotMet
);
}
TEST
(
CinnLaunchContextTest
,
TestAssignVariablePreCondition
)
{
auto
launch_context
=
CreateDefaultLaunchContext
();
platform
::
CPUPlace
place
;
framework
::
Scope
scope
;
auto
launch_context
=
CreateDefaultLaunchContext
();
launch_context
->
UpdateCapturedEnv
(
scope
,
place
);
auto
*
tensor4
=
scope
.
Var
(
"var4"
)
->
GetMutable
<
LoDTensor
>
();
// not used
ASSERT_THROW
(
launch_context
->
AssignExternalVariable
(
"var4"
,
place
,
tensor4
),
ASSERT_THROW
(
launch_context
->
AssignExternalVariable
(
"var4"
),
paddle
::
platform
::
EnforceNotMet
);
// not found
ASSERT_THROW
(
launch_context
->
AssignExternalVariable
(
"cinn_var4"
,
place
,
tensor4
),
paddle
::
platform
::
EnforceNotMet
);
ASSERT_THROW
(
launch_context
->
AssignInternalVariable
(
"cinn_var4"
),
paddle
::
platform
::
EnforceNotMet
);
}
TEST
(
CinnLaunchContextTest
,
TestSetArgument
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
Place
place
(
cpu_place
);
framework
::
Scope
scope
;
auto
launch_context
=
CreateDefaultLaunchContext
();
launch_context
->
UpdateCapturedEnv
(
scope
,
place
);
platform
::
CPUPlace
place
;
framework
::
Scope
scope
;
// assign external variables
auto
*
tensor1
=
scope
.
Var
(
"var1"
)
->
GetMutable
<
LoDTensor
>
();
float
*
data1
=
tensor1
->
mutable_data
<
float
>
(
framework
::
make_ddim
({
3
,
4
}),
place
);
data1
[
0
]
=
9.99
f
;
data1
[
10
]
=
19.99
f
;
ASSERT_NO_THROW
(
launch_context
->
AssignExternalVariable
(
"var1"
));
// assign external variable
ASSERT_NO_THROW
(
launch_context
->
AssignExternalVariable
(
"var1"
,
place
,
tensor1
));
auto
*
tensor2
=
scope
.
Var
(
"var2"
)
->
GetMutable
<
LoDTensor
>
();
tensor2
->
mutable_data
<
float
>
(
framework
::
make_ddim
({
6
,
7
,
8
}),
place
);
ASSERT_NO_THROW
(
launch_context
->
AssignInternalVariable
(
"cinn_var2"
,
place
,
tensor2
));
// FinalizeArguments not missed check
ASSERT_THROW
(
launch_context
->
FinalizeArguments
(),
paddle
::
platform
::
EnforceNotMet
);
auto
*
tensor3
=
scope
.
Var
(
"var3"
)
->
GetMutable
<
LoDTensor
>
();
tensor3
->
mutable_data
<
float
>
(
framework
::
make_ddim
({
10
,
16
}),
place
);
ASSERT_NO_THROW
(
launch_context
->
AssignExternalVariable
(
"var3"
,
place
,
tensor3
));
ASSERT_NO_THROW
(
launch_context
->
AssignExternalVariable
(
"var3"
));
// FinalizeArguments missed check
ASSERT_THROW
(
launch_context
->
FinalizeArguments
(),
paddle
::
platform
::
EnforceNotMet
);
// test get internal variables
auto
internal_variable_names
=
launch_context
->
GetInternalVariableNames
();
ASSERT_EQ
(
internal_variable_names
.
size
(),
1
);
EXPECT_EQ
(
*
internal_variable_names
.
begin
(),
"cinn_var2"
);
auto
*
tensor2
=
scope
.
Var
(
"var2"
)
->
GetMutable
<
LoDTensor
>
();
tensor2
->
mutable_data
<
float
>
(
framework
::
make_ddim
({
6
,
7
,
8
}),
place
);
ASSERT_NO_THROW
(
launch_context
->
AssignInternalVariable
(
"cinn_var2"
));
// check argument is set correctly and alloc/free callbacks work well
auto
name2argument
=
launch_context
->
FinalizeArguments
();
ASSERT_EQ
(
name2argument
.
size
(),
3
);
ASSERT_EQ
(
name2argument
.
count
(
"cinn_var1"
),
1
);
// check ShareTensorWithCinnBuffer
ASSERT_TRUE
(
launch_context
->
IsArgumentsInitialized
());
auto
*
cinn_buffer
=
static_cast
<
cinn_buffer_t
*>
(
name2argument
.
at
(
"cinn_var1"
));
ASSERT_EQ
(
cinn_buffer
->
memory
,
nullptr
);
cinn_buffer
->
external_malloc
->
operator
()(
nullptr
,
cinn_buffer
);
ASSERT_NE
(
cinn_buffer
->
memory
,
nullptr
);
...
...
paddle/fluid/operators/cinn/cinn_launch_op.cu.cc
浏览文件 @
883ee1a3
...
...
@@ -31,26 +31,6 @@ namespace operators {
namespace
details
{
#ifdef PADDLE_WITH_CUDA
void
CUDART_CB
ReleaseScope
(
void
*
data
)
{
auto
*
temp_scope
=
static_cast
<
framework
::
Scope
*>
(
data
);
delete
temp_scope
;
}
void
CUDART_CB
ReleaseBuffers
(
void
*
data
)
{
auto
*
buffers
=
static_cast
<
std
::
vector
<
std
::
unique_ptr
<
cinn_buffer_t
>>*>
(
data
);
delete
buffers
;
}
template
<
>
void
ReleaseResource
<
platform
::
CUDADeviceContext
>
(
const
std
::
vector
<
void
*>&
resources
,
void
*
stream
)
{
PADDLE_ENFORCE_GPU_SUCCESS
(
cudaLaunchHostFunc
(
static_cast
<
gpuStream_t
>
(
stream
),
ReleaseScope
,
resources
[
0
]));
PADDLE_ENFORCE_GPU_SUCCESS
(
cudaLaunchHostFunc
(
static_cast
<
gpuStream_t
>
(
stream
),
ReleaseBuffers
,
resources
[
1
]));
}
template
<
>
void
*
GetStream
<
platform
::
CUDADeviceContext
>
(
const
framework
::
ExecutionContext
&
ctx
)
{
...
...
paddle/fluid/operators/cinn/cinn_launch_op.h
浏览文件 @
883ee1a3
...
...
@@ -56,25 +56,12 @@ void LaunchCinnExecution(const CinnCompiledObject& compiled_obj,
// Set cinn FLAGS (such as FLAGS_cinn_cudnn_deterministic) with paddle's FLAGS.
void
SetCinnRuntimeFlags
();
template
<
typename
DeviceContext
>
void
ReleaseResource
(
const
std
::
vector
<
void
*>&
resources
,
void
*
stream
)
{
auto
*
temp_scope
=
static_cast
<
framework
::
Scope
*>
(
resources
[
0
]);
auto
*
buffers
=
static_cast
<
std
::
vector
<
std
::
unique_ptr
<
cinn_buffer_t
>>*>
(
resources
[
1
]);
delete
temp_scope
;
delete
buffers
;
}
template
<
typename
DeviceContext
>
void
*
GetStream
(
const
framework
::
ExecutionContext
&
ctx
)
{
return
nullptr
;
}
#ifdef PADDLE_WITH_CUDA
template
<
>
void
ReleaseResource
<
platform
::
CUDADeviceContext
>
(
const
std
::
vector
<
void
*>&
resources
,
void
*
stream
);
template
<
>
void
*
GetStream
<
platform
::
CUDADeviceContext
>
(
const
framework
::
ExecutionContext
&
ctx
);
...
...
@@ -116,56 +103,54 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
compilation_key
,
inputs_name2tensor
,
target
,
stream
);
details
::
DebugCinnCompiledResult
(
cinn_compiled_object
);
auto
launch_context
=
std
::
make_unique
<
details
::
CinnLaunchContext
>
(
cinn_compiled_object
.
paddle2cinn_varmap
,
cinn_compiled_object
.
scope
);
auto
*
launch_context
=
cinn_compiled_object
.
launch_context
.
get
();
// Step 3. Prepare arguments needed for the compiled executable program.
VLOG
(
4
)
<<
"CinnLaunchOp prepare arguments"
;
// 3.1 Prepare input variables: tensors of input variables have
// been initialized before graph compiled, just check the
// equiality between tensors of paddle and cinn.
for
(
const
auto
&
var_name
:
input_variable_names
)
{
if
(
!
launch_context
->
IsVariableUsed
(
var_name
))
{
// some input variables don't need for cinn because they are
// eliminated by optimized passes or some cinn operators use
// less variables
VLOG
(
4
)
<<
"Input variable("
<<
var_name
<<
") not used by cinn"
;
continue
;
launch_context
->
UpdateCapturedEnv
(
scope
,
place
);
if
(
!
launch_context
->
IsArgumentsInitialized
())
{
VLOG
(
4
)
<<
"CinnLaunchOp prepare arguments"
;
// 3.1 Prepare input variables: tensors of input variables have
// been initialized before graph compiled, just check the
// equiality between tensors of paddle and cinn.
for
(
const
auto
&
var_name
:
input_variable_names
)
{
if
(
!
launch_context
->
IsVariableUsed
(
var_name
))
{
// some input variables don't need for cinn because they are
// eliminated by optimized passes or some cinn operators use
// less variables
VLOG
(
4
)
<<
"Input variable("
<<
var_name
<<
") not used by cinn"
;
continue
;
}
launch_context
->
AssignExternalVariable
(
var_name
);
}
launch_context
->
AssignExternalVariable
(
var_name
,
place
,
scope
.
GetVar
(
var_name
)
->
GetMutable
<
LoDTensor
>
());
}
// 3.2 Prepare output variables: all output variables should
// be initialized and allocated buffer before
// the runtime program start execution, the compilation result
// includes details of their buffer assginment and we use that to
// allocate space in Paddle. For those variables allocated yet,
// like persistable parameters, just check the equiality between
// Paddle allocation and CINN buffer assginment.
auto
output_variable_names
=
ctx
.
OutputNames
(
kOutputs
);
for
(
const
auto
var_name
:
output_variable_names
)
{
PADDLE_ENFORCE_EQ
(
launch_context
->
IsVariableUsed
(
var_name
),
true
,
platform
::
errors
::
InvalidArgument
(
"Output variable(%s) not used by cinn"
,
var_name
));
auto
*
tensor
=
scope
.
GetVar
(
var_name
)
->
GetMutable
<
LoDTensor
>
();
launch_context
->
AssignExternalVariable
(
var_name
,
place
,
tensor
);
}
// 3.2 Prepare output variables: all output variables should
// be initialized and allocated buffer before
// the runtime program start execution, the compilation result
// includes details of their buffer assginment and we use that to
// allocate space in Paddle. For those variables allocated yet,
// like persistable parameters, just check the equiality between
// Paddle allocation and CINN buffer assginment.
auto
output_variable_names
=
ctx
.
OutputNames
(
kOutputs
);
for
(
const
auto
var_name
:
output_variable_names
)
{
PADDLE_ENFORCE_EQ
(
launch_context
->
IsVariableUsed
(
var_name
),
true
,
platform
::
errors
::
InvalidArgument
(
"Output variable(%s) not used by cinn"
,
var_name
));
launch_context
->
AssignExternalVariable
(
var_name
);
}
// 3.3 Prepare internal or temporary variables: Create a temporary
// scope to keep internal variables within graph or temporary
// variables needed by the compiled runtime program in addition.
// Here we directly use the names from CinnScope as Paddle variable
// names, because they will not be used outside the graph
// and should be destructed after computation finished.
auto
internal_variable_names
=
launch_context
->
GetInternalVariableNames
();
framework
::
Scope
*
temp_scope
=
scope
.
NewTmpScope
().
release
();
for
(
const
auto
&
var_name
:
internal_variable_names
)
{
auto
*
tensor
=
temp_scope
->
Var
(
var_name
)
->
GetMutable
<
LoDTensor
>
();
launch_context
->
AssignInternalVariable
(
var_name
,
place
,
tensor
);
// 3.3 Prepare internal or temporary variables: Create a temporary
// scope to keep internal variables within graph or temporary
// variables needed by the compiled runtime program in addition.
// Here we directly use the names from CinnScope as Paddle variable
// names, because they will not be used outside the graph
// and should be destructed after computation finished.
auto
internal_variable_names
=
launch_context
->
GetInternalVariableNames
();
for
(
const
auto
&
var_name
:
internal_variable_names
)
{
launch_context
->
AssignInternalVariable
(
var_name
);
}
}
// Step 4. Set CINN runtime FLAGS, such as FLAGS_cinn_cudnn_deterministic.
...
...
@@ -175,12 +160,6 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
VLOG
(
4
)
<<
"Run Cinn compiled executable program with stream: "
<<
stream
;
details
::
LaunchCinnExecution
(
cinn_compiled_object
,
*
launch_context
,
stream
);
VLOG
(
4
)
<<
"CinnLaunchOp launch execution done."
;
// Step 6. Release some resources, such as `temp_scope` and cinn_buffers.
auto
*
buffers_holder
=
new
std
::
vector
<
std
::
unique_ptr
<
cinn_buffer_t
>>
{
launch_context
->
HandoverBuffers
()};
details
::
ReleaseResource
<
DeviceContext
>
({
temp_scope
,
buffers_holder
},
stream
);
}
};
...
...
paddle/fluid/operators/cinn/cinn_launch_op_test.cc
浏览文件 @
883ee1a3
...
...
@@ -130,8 +130,9 @@ TEST(CinnLaunchOpTest, TestElementwiseAddPass) {
scope
.
Var
(
test_out_name
)
->
GetMutable
<
LoDTensor
>
();
scope
.
Var
(
expected_out_name
)
->
GetMutable
<
LoDTensor
>
();
cinn_launch_op
->
Run
(
scope
,
place
);
elementwise_add_op
->
Run
(
scope
,
place
);
platform
::
Place
run_place
(
place
);
cinn_launch_op
->
Run
(
scope
,
run_place
);
elementwise_add_op
->
Run
(
scope
,
run_place
);
LoDTensor
test_out
,
expected_out
;
TensorCopySync
(
scope
.
Var
(
test_out_name
)
->
Get
<
LoDTensor
>
(),
...
...
paddle/fluid/operators/complex_view_op.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/complex_view_op.h"
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
operators
{
class
AsComplexOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"X"
),
"Input"
,
"X"
,
"as_complex"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Out"
),
"Output"
,
"Out"
,
"as_complex"
);
auto
in_dims
=
ctx
->
GetInputDim
(
"X"
);
const
int
input_rank
=
in_dims
.
size
();
PADDLE_ENFORCE_GE
(
input_rank
,
1
,
platform
::
errors
::
InvalidArgument
(
"The rank of input(X) is less than 1. "
"Expected the rank of input(X) to be equal to or greater than 1."
"But received rank of input(X) = %d"
,
input_rank
));
const
int
last_dim_size
=
in_dims
[
input_rank
-
1
];
PADDLE_ENFORCE_EQ
(
last_dim_size
,
2
,
platform
::
errors
::
InvalidArgument
(
"The size of the last dimension of input(X)"
"does not equals 2."
"Expected the size of last dimension of input(X) to be 2."
"But received %d"
,
last_dim_size
));
const
framework
::
DDim
out_dims
(
in_dims
.
Get
(),
input_rank
-
1
);
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
}
};
class
AsComplexOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor), The input tensor of view_as_complex op."
);
AddOutput
(
"Out"
,
"(Tensor), The output tensor of view_as_complex op."
);
AddComment
(
R"DOC(
As_complex Operator.
This operator is used to return a complex tensor represented
by an old-fashioned real tensor. The size of the last dimension of
the input tensor should be 2, which corresponds to 'real' and
'complex', respectively.
)DOC"
);
}
};
template
<
typename
T
>
class
AsComplexGradMaker
:
public
framework
::
SingleGradOpMaker
<
T
>
{
public:
using
framework
::
SingleGradOpMaker
<
T
>::
SingleGradOpMaker
;
void
Apply
(
GradOpPtr
<
T
>
retv
)
const
override
{
retv
->
SetType
(
"as_real"
);
retv
->
SetInput
(
"X"
,
this
->
OutputGrad
(
"Out"
));
retv
->
SetAttrMap
(
this
->
Attrs
());
retv
->
SetOutput
(
"Out"
,
this
->
InputGrad
(
"X"
));
}
};
class
AsRealOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"X"
),
"Input"
,
"X"
,
"as_real"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Out"
),
"Output"
,
"Out"
,
"as_real"
);
auto
out_dims_v
=
framework
::
vectorize
(
ctx
->
GetInputDim
(
"X"
));
out_dims_v
.
push_back
(
2
);
const
framework
::
DDim
out_dims
=
framework
::
make_ddim
(
out_dims_v
);
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
input_data_type
=
framework
::
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"X"
);
return
framework
::
OpKernelType
(
framework
::
ToRealType
(
input_data_type
),
ctx
.
GetPlace
());
}
};
class
AsRealOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor), The input tensor of as_real op."
);
AddOutput
(
"Out"
,
"(Tensor), The output tensor of as_real op."
);
AddComment
(
R"DOC(
AsReal Operator.
This operator is used to return an old-fashioned real tensor from a
complex tensor. The size of the last dimension of the output tensor is 2,
which corresponds to 'real' and 'complex', respectively.
)DOC"
);
}
};
template
<
typename
T
>
class
AsRealGradMaker
:
public
framework
::
SingleGradOpMaker
<
T
>
{
public:
using
framework
::
SingleGradOpMaker
<
T
>::
SingleGradOpMaker
;
void
Apply
(
GradOpPtr
<
T
>
retv
)
const
override
{
retv
->
SetType
(
"as_complex"
);
retv
->
SetInput
(
"X"
,
this
->
OutputGrad
(
"Out"
));
retv
->
SetAttrMap
(
this
->
Attrs
());
retv
->
SetOutput
(
"Out"
,
this
->
InputGrad
(
"X"
));
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
as_complex
,
ops
::
AsComplexOp
,
ops
::
AsComplexOpMaker
,
ops
::
AsComplexGradMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
AsComplexGradMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OPERATOR
(
as_real
,
ops
::
AsRealOp
,
ops
::
AsRealOpMaker
,
ops
::
AsRealGradMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
AsRealGradMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OP_CPU_KERNEL
(
as_complex
,
ops
::
AsComplexKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
AsComplexKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
as_real
,
ops
::
AsRealKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
AsRealKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/complex_view_op.cu
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/complex_view_op.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/enforce.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
as_complex
,
ops
::
AsComplexKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
AsComplexKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
as_real
,
ops
::
AsRealKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
AsRealKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/fluid/operators/complex_view_op.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/for_range.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
AsComplexKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
auto
*
x
=
context
.
Input
<
framework
::
LoDTensor
>
(
"X"
);
auto
*
out
=
context
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
out
->
mutable_data
<
platform
::
complex
<
T
>>
(
context
.
GetPlace
());
// TensorCopy also changes output's shape & dtype
const
framework
::
DDim
out_dims_original
=
out
->
dims
();
framework
::
TensorCopy
(
*
x
,
context
.
GetPlace
(),
out
);
out
->
Resize
(
out_dims_original
);
// restored the shape
out
->
mutable_data
<
platform
::
complex
<
T
>>
(
context
.
GetPlace
());
// restore the dtype
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
AsRealKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
auto
*
x
=
context
.
Input
<
framework
::
LoDTensor
>
(
"X"
);
auto
*
out
=
context
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
const
framework
::
DDim
out_dims_original
=
out
->
dims
();
framework
::
TensorCopy
(
*
x
,
context
.
GetPlace
(),
out
);
out
->
Resize
(
out_dims_original
);
// restored the shape
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
// restore the dtype
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/ipu_runtime_op.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/ipu_runtime_op.h"
namespace
paddle
{
namespace
operators
{
class
IpuRuntimeOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
proto
::
VarType
::
Type
(
ctx
.
Attr
<
int
>
(
"dtype"
)),
ctx
.
device_context
());
}
};
class
IpuRuntimeOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"FeedList"
,
"FeedList of Graph"
).
AsDuplicable
();
AddOutput
(
"FetchList"
,
"FetchList of Graph"
).
AsDuplicable
();
AddAttr
<
int
>
(
"dtype"
,
"(int, default 5 (FP32)) "
"Output data type"
)
.
SetDefault
(
framework
::
proto
::
VarType
::
FP32
);
AddComment
(
R"DOC(
Run graph by PopART runtime.
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
ipu_runtime
,
ops
::
IpuRuntimeOp
,
ops
::
IpuRuntimeOpMaker
);
REGISTER_OP_IPU_KERNEL
(
ipu_runtime
,
ops
::
IpuRuntimeKernel
<
float
>
,
ops
::
IpuRuntimeKernel
<
double
>
,
ops
::
IpuRuntimeKernel
<
int
>
,
ops
::
IpuRuntimeKernel
<
int64_t
>
,
ops
::
IpuRuntimeKernel
<
bool
>
,
ops
::
IpuRuntimeKernel
<
int8_t
>
,
ops
::
IpuRuntimeKernel
<
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/ipu_runtime_op.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/framework/ipu/ipu_backend.h"
#include "paddle/fluid/framework/tensor.h"
#endif
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
class
IpuRuntimeKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
#ifdef PADDLE_WITH_IPU
auto
ipu_backend
=
framework
::
ipu
::
IpuBackend
::
GetInstance
();
if
(
!
ipu_backend
->
DeviceIsAttached
())
{
const
platform
::
IPUDeviceContext
&
ipu_ctx
=
reinterpret_cast
<
const
platform
::
IPUDeviceContext
&>
(
ctx
.
device_context
());
ipu_backend
->
AttachDevice
(
ipu_ctx
.
DeviceId
());
}
auto
inputs
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"FeedList"
);
auto
outputs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
"FetchList"
);
auto
output_names
=
ctx
.
OutputNames
(
"FetchList"
);
VLOG
(
4
)
<<
"IpuRuntime Kernel, begin to run graph"
;
ipu_backend
->
Run
(
inputs
,
outputs
,
ctx
);
// post-run
// resize tensor when tensor.dims() is empty
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
++
i
)
{
auto
*
out
=
outputs
[
i
];
if
(
out
->
dims
().
size
()
==
0
)
{
auto
tensor_dtype
=
out
->
type
();
auto
sizeof_dtype
=
framework
::
SizeOfType
(
tensor_dtype
);
int64_t
dim
=
out
->
memory_size
()
/
sizeof_dtype
;
out
->
Resize
({
dim
});
VLOG
(
10
)
<<
"set ipu_runtime_op output: "
<<
output_names
[
i
]
<<
" dims from () to: "
<<
"("
<<
dim
<<
")"
;
}
}
#else
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Please compile WITH_IPU option to enable ipu_runtime op"
));
#endif
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/math/math_function.cc
浏览文件 @
883ee1a3
...
...
@@ -173,6 +173,13 @@ void set_constant_with_place<platform::NPUPinnedPlace>(
platform
::
errors
::
Unimplemented
(
"NPUPinnedPlace is not supported"
));
}
template
<
>
void
set_constant_with_place
<
platform
::
IPUPlace
>
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
float
value
)
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"IPUPlace is not supported"
));
}
template
<
>
void
set_constant_with_place
<
platform
::
CPUPlace
>
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
...
...
paddle/fluid/operators/py_layer_op.h
浏览文件 @
883ee1a3
...
...
@@ -54,7 +54,7 @@ class PyLayerOp : public framework::OperatorWithKernel {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
data_type
=
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"X"
)
;
auto
data_type
=
paddle
::
framework
::
proto
::
VarType
::
Type
::
VarType_Type_FP32
;
return
framework
::
OpKernelType
(
data_type
,
ctx
.
device_context
());
}
...
...
paddle/fluid/operators/reshape_op.cc
浏览文件 @
883ee1a3
...
...
@@ -555,10 +555,10 @@ class Reshape2Op : public ReshapeOp {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
multi_inputs
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"ShapeTensor"
);
if
(
multi_inputs
.
size
()
>
0
)
{
return
framework
::
KernelSignature
(
"reshape
.
mulhost"
,
{
"X"
,
"ShapeTensor"
},
return
framework
::
KernelSignature
(
"reshape
_
mulhost"
,
{
"X"
,
"ShapeTensor"
},
{},
{
"Out"
});
}
else
if
(
ctx
.
HasInput
(
"Shape"
))
{
return
framework
::
KernelSignature
(
"reshape
.
host"
,
{
"X"
,
"Shape"
},
{},
return
framework
::
KernelSignature
(
"reshape
_
host"
,
{
"X"
,
"Shape"
},
{},
{
"Out"
});
}
else
{
return
framework
::
KernelSignature
(
"reshape"
,
{
"X"
},
{
"shape"
},
{
"Out"
});
...
...
paddle/fluid/platform/device/gpu/cuda/cuda_graph.cc
浏览文件 @
883ee1a3
...
...
@@ -18,6 +18,7 @@ namespace paddle {
namespace
platform
{
std
::
unique_ptr
<
CUDAGraph
>
CUDAGraph
::
capturing_graph_
{
nullptr
};
paddle
::
optional
<
std
::
thread
::
id
>
CUDAGraph
::
capturing_thread_id_
{
paddle
::
none
};
void
CUDAGraph
::
Reset
()
{
if
(
is_reset_
)
return
;
...
...
@@ -58,6 +59,13 @@ void CUDAGraph::BeginSegmentCapture() {
IsCapturing
(),
true
,
errors
::
PermissionDenied
(
"BeginSegmentCapture should be called when CUDA "
"Graph is capturing."
));
if
(
IsThreadLocalCapturing
())
{
PADDLE_ENFORCE_EQ
(
IsThisThreadCapturing
(),
true
,
platform
::
errors
::
PermissionDenied
(
"When capturing CUDA Graph in the thread local mode, "
"you cannot begin segmented capturing in the thread "
"which is not the one that starts the capturing."
));
}
PADDLE_ENFORCE_GPU_SUCCESS
(
cudaStreamBeginCapture
(
capturing_graph_
->
stream_
,
capturing_graph_
->
capture_mode_
));
PADDLE_ENFORCE_EQ
(
IsValidCapturing
(),
true
,
...
...
@@ -82,6 +90,11 @@ void CUDAGraph::BeginCapture(platform::CUDAPlace place, cudaStream_t stream,
capturing_graph_
->
place_
=
place
;
capturing_graph_
->
stream_
=
stream
;
capturing_graph_
->
capture_mode_
=
mode
;
if
(
mode
==
cudaStreamCaptureModeThreadLocal
)
{
capturing_thread_id_
=
std
::
this_thread
::
get_id
();
VLOG
(
10
)
<<
"Capturing CUDA Graph in thread local mode, thread id: "
<<
capturing_thread_id_
;
}
BeginSegmentCapture
();
#endif
}
...
...
@@ -115,6 +128,7 @@ void CUDAGraph::EndSegmentCapture() {
std
::
unique_ptr
<
CUDAGraph
>
CUDAGraph
::
EndCapture
()
{
EndSegmentCapture
();
capturing_thread_id_
=
paddle
::
none
;
return
std
::
move
(
capturing_graph_
);
}
...
...
paddle/fluid/platform/device/gpu/cuda/cuda_graph.h
浏览文件 @
883ee1a3
...
...
@@ -18,6 +18,7 @@
#include <functional>
#include <memory>
#include <mutex>
#include <thread>
#include <vector>
#include "cuda.h" // NOLINT
#include "cuda_runtime.h" // NOLINT
...
...
@@ -26,6 +27,7 @@
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/utils/optional.h"
namespace
paddle
{
namespace
platform
{
...
...
@@ -99,6 +101,25 @@ class CUDAGraph {
// supported during capturing CUDA Graph.
static
bool
IsValidCapturing
();
static
bool
IsThreadLocalCapturing
()
{
#if CUDA_VERSION >= 10010
return
IsCapturing
()
&&
capturing_graph_
->
capture_mode_
==
cudaStreamCaptureModeThreadLocal
;
#else
return
false
;
#endif
}
static
bool
IsThisThreadCapturing
()
{
if
(
UNLIKELY
(
IsCapturing
()))
{
return
IsThreadLocalCapturing
()
?
capturing_thread_id_
.
get
()
==
std
::
this_thread
::
get_id
()
:
true
;
}
else
{
return
false
;
}
}
private:
static
CUDAGraphID
UniqueID
()
{
static
std
::
atomic
<
CUDAGraphID
>
id
;
...
...
@@ -118,6 +139,7 @@ class CUDAGraph {
bool
is_reset_
{
false
};
std
::
mutex
mtx_
;
static
paddle
::
optional
<
std
::
thread
::
id
>
capturing_thread_id_
;
static
std
::
unique_ptr
<
CUDAGraph
>
capturing_graph_
;
};
...
...
paddle/fluid/platform/device/gpu/gpu_primitives.h
浏览文件 @
883ee1a3
...
...
@@ -101,6 +101,20 @@ inline static __device__ uint32_t add_to_high_half(uint32_t val, float x) {
return
(
val
&
0xFFFFu
)
|
(
static_cast
<
uint32_t
>
(
high_half
.
x
)
<<
16
);
}
#if CUDA_VERSION >= 10000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
static
__device__
__forceinline__
float16
CUDAFP16ToPDFP16
(
__half
x
)
{
return
*
reinterpret_cast
<
float16
*>
(
&
x
);
}
static
__device__
__forceinline__
__half
PDFP16ToCUDAFP16
(
float16
x
)
{
return
*
reinterpret_cast
<
__half
*>
(
&
x
);
}
CUDA_ATOMIC_WRAPPER
(
Add
,
float16
)
{
return
CUDAFP16ToPDFP16
(
atomicAdd
(
reinterpret_cast
<
__half
*>
(
address
),
PDFP16ToCUDAFP16
(
val
)));
}
#else
CUDA_ATOMIC_WRAPPER
(
Add
,
float16
)
{
// concrete packed float16 value may exsits in lower or higher 16bits
// of the 32bits address.
...
...
@@ -133,6 +147,7 @@ CUDA_ATOMIC_WRAPPER(Add, float16) {
}
}
#endif
#endif
CUDA_ATOMIC_WRAPPER
(
Add
,
complex
<
float
>
)
{
float
*
real
=
reinterpret_cast
<
float
*>
(
address
);
...
...
paddle/fluid/platform/device/ipu/CMakeLists.txt
浏览文件 @
883ee1a3
# IPU
IF
(
WITH_IPU
)
FILE
(
GLOB POPART_CANONICALIZATION_SRC
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/platform/device/ipu/popart_canonicalization/*.cc
)
cc_library
(
ipu_device SRCS device.cc DEPS enforce popart
)
cc_library
(
ipu_utils SRCS ipu_utils.cc DEPS memory framework_proto popart
)
cc_library
(
ipu_strategy SRCS ipu_strategy.cc DEPS popart graph framework_proto enforce
)
...
...
paddle/fluid/platform/device/ipu/popart_canonicalization/activation_ops.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
platform
{
namespace
ipu
{
namespace
{
Node
*
activation_op_handler
(
Graph
*
graph
,
Node
*
node
,
const
std
::
string
&
type
)
{
auto
new_node
=
CreateBaseOp
(
graph
,
node
,
type
,
{
GetInputVarNode
(
"X"
,
node
)},
node
->
outputs
);
return
new_node
;
}
Node
*
relu_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
activation_op_handler
(
graph
,
node
,
"popart_relu"
);
}
Node
*
tanh_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
activation_op_handler
(
graph
,
node
,
"popart_tanh"
);
}
Node
*
log_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
activation_op_handler
(
graph
,
node
,
"popart_log"
);
}
Node
*
sigmoid_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
activation_op_handler
(
graph
,
node
,
"popart_sigmoid"
);
}
Node
*
sqrt_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
activation_op_handler
(
graph
,
node
,
"popart_sqrt"
);
}
Node
*
gelu_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
activation_op_handler
(
graph
,
node
,
"popart_gelu_v2"
);
}
Node
*
log_softmax_handler
(
Graph
*
graph
,
Node
*
node
)
{
auto
axis
=
BOOST_GET_CONST
(
int
,
node
->
Op
()
->
GetAttr
(
"axis"
));
auto
new_softmax
=
CreateSoftmaxOpset11
(
graph
,
node
,
node
->
inputs
,
{},
axis
);
return
CreateBaseOp
(
graph
,
node
,
"popart_log"
,
new_softmax
->
outputs
,
node
->
outputs
);
}
REGISTER_HANDLER
(
relu
,
relu_handler
);
REGISTER_HANDLER
(
tanh
,
tanh_handler
);
REGISTER_HANDLER
(
log
,
log_handler
);
REGISTER_HANDLER
(
sigmoid
,
sigmoid_handler
);
REGISTER_HANDLER
(
sqrt
,
sqrt_handler
);
REGISTER_HANDLER
(
gelu
,
gelu_handler
);
REGISTER_HANDLER
(
log_softmax
,
log_softmax_handler
);
}
// namespace
}
// namespace ipu
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
namespace
paddle
{
namespace
platform
{
namespace
ipu
{
// This avoids the static initialisation order fiasco,
std
::
unordered_map
<
std
::
string
,
SymbolHandler
>
&
SymbolHandlers
()
{
static
std
::
unordered_map
<
std
::
string
,
SymbolHandler
>
symbol_handlers
;
return
symbol_handlers
;
}
bool
RegisterHandler
(
const
std
::
string
&
symbol
,
const
SymbolHandler
&
handler
)
{
if
(
SymbolHandlers
().
count
(
symbol
)
!=
0
)
{
LOG
(
WARNING
)
<<
"Trying to register popart handler twice for operator: "
<<
symbol
;
return
false
;
}
bool
new_handler
=
SymbolHandlers
().
emplace
(
symbol
,
handler
).
second
;
return
new_handler
;
}
// Return a pointer to a handler if one is registered for this kind of node or
// an empty std::function otherwise.
SymbolHandler
GetHandler
(
const
std
::
string
&
kind
)
{
auto
it
=
SymbolHandlers
().
find
(
kind
);
if
(
it
!=
SymbolHandlers
().
end
())
{
return
it
->
second
;
}
return
{};
}
void
ConnectNodes
(
Node
*
first_node
,
Node
*
next_node
)
{
first_node
->
outputs
.
push_back
(
next_node
);
next_node
->
inputs
.
push_back
(
first_node
);
}
void
DisConnectNodes
(
Node
*
first_node
,
Node
*
next_node
)
{
auto
rm_by_value
=
[
&
](
std
::
vector
<
Node
*>
&
vec
,
Node
*
n
)
{
vec
.
erase
(
std
::
remove
(
vec
.
begin
(),
vec
.
end
(),
n
),
vec
.
end
());
};
rm_by_value
(
first_node
->
outputs
,
next_node
);
rm_by_value
(
next_node
->
inputs
,
first_node
);
rm_by_value
(
first_node
->
inputs
,
next_node
);
rm_by_value
(
next_node
->
outputs
,
first_node
);
}
void
ClearNode
(
Node
*
node
)
{
auto
rm_by_value
=
[
&
](
std
::
vector
<
Node
*>
&
vec
,
Node
*
n
)
{
vec
.
erase
(
std
::
remove
(
vec
.
begin
(),
vec
.
end
(),
n
),
vec
.
end
());
};
for
(
auto
*
node_in
:
node
->
inputs
)
{
rm_by_value
(
node_in
->
outputs
,
node
);
}
for
(
auto
*
node_out
:
node
->
outputs
)
{
rm_by_value
(
node_out
->
inputs
,
node
);
}
}
void
CopyOpAttr
(
const
std
::
string
&
attr_name
,
OpDesc
*
op
,
OpDesc
*
new_op
,
bool
override
)
{
if
(
new_op
->
HasAttr
(
attr_name
)
&&
!
override
)
{
return
;
}
if
(
op
->
HasAttr
(
attr_name
))
{
VLOG
(
10
)
<<
"Copying attr: "
<<
attr_name
<<
" from "
<<
op
->
Type
()
<<
" to "
<<
new_op
->
Type
();
new_op
->
SetAttr
(
attr_name
,
op
->
GetAttr
(
attr_name
));
new_op
->
Flush
();
}
}
const
int
VarType2OnnxDtype
(
const
int
type
)
{
auto
dtype
=
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
type
);
switch
(
dtype
)
{
case
framework
::
proto
::
VarType
::
BOOL
:
return
static_cast
<
int
>
(
ONNXDataType
::
BOOL
);
case
framework
::
proto
::
VarType
::
INT16
:
return
static_cast
<
int
>
(
ONNXDataType
::
INT16
);
case
framework
::
proto
::
VarType
::
INT32
:
return
static_cast
<
int
>
(
ONNXDataType
::
INT32
);
case
framework
::
proto
::
VarType
::
INT64
:
return
static_cast
<
int
>
(
ONNXDataType
::
INT64
);
case
framework
::
proto
::
VarType
::
FP16
:
return
static_cast
<
int
>
(
ONNXDataType
::
FLOAT16
);
case
framework
::
proto
::
VarType
::
FP32
:
return
static_cast
<
int
>
(
ONNXDataType
::
FLOAT
);
case
framework
::
proto
::
VarType
::
FP64
:
return
static_cast
<
int
>
(
ONNXDataType
::
DOUBLE
);
case
framework
::
proto
::
VarType
::
UINT8
:
return
static_cast
<
int
>
(
ONNXDataType
::
UINT8
);
case
framework
::
proto
::
VarType
::
INT8
:
return
static_cast
<
int
>
(
ONNXDataType
::
INT8
);
case
framework
::
proto
::
VarType
::
BF16
:
return
static_cast
<
int
>
(
ONNXDataType
::
BFLOAT16
);
case
framework
::
proto
::
VarType
::
COMPLEX64
:
return
static_cast
<
int
>
(
ONNXDataType
::
COMPLEX64
);
case
framework
::
proto
::
VarType
::
COMPLEX128
:
return
static_cast
<
int
>
(
ONNXDataType
::
COMPLEX128
);
default:
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Unsupported data type: %d."
,
dtype
));
}
}
const
std
::
string
VarType2PopStr
(
const
int
type
)
{
auto
dtype
=
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
type
);
switch
(
dtype
)
{
case
framework
::
proto
::
VarType
::
UINT8
:
return
"UINT8"
;
case
framework
::
proto
::
VarType
::
INT8
:
return
"INT8"
;
case
framework
::
proto
::
VarType
::
INT16
:
return
"INT16"
;
case
framework
::
proto
::
VarType
::
INT32
:
return
"INT32"
;
case
framework
::
proto
::
VarType
::
INT64
:
return
"INT64"
;
case
framework
::
proto
::
VarType
::
BOOL
:
return
"BOOL"
;
case
framework
::
proto
::
VarType
::
FP64
:
return
"DOUBLE"
;
case
framework
::
proto
::
VarType
::
FP32
:
return
"FLOAT"
;
case
framework
::
proto
::
VarType
::
FP16
:
return
"FLOAT16"
;
default:
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"Unsupported data type."
));
}
}
Node
*
GetInputVarNode
(
const
std
::
string
&
input_name
,
const
Node
*
op_node
,
const
int
id
)
{
auto
var_name
=
op_node
->
Op
()
->
Input
(
input_name
).
at
(
id
);
return
GetInputVarNodeByVarName
(
var_name
,
op_node
);
}
Node
*
GetOutputVarNode
(
const
std
::
string
&
output_name
,
const
Node
*
op_node
,
const
int
id
)
{
auto
var_name
=
op_node
->
Op
()
->
Output
(
output_name
).
at
(
id
);
return
GetOutputVarNodeByVarName
(
var_name
,
op_node
);
}
Node
*
GetInputVarNodeByVarName
(
const
std
::
string
&
var_name
,
const
Node
*
op_node
)
{
for
(
auto
*
var
:
op_node
->
inputs
)
{
if
(
var
->
Name
()
==
var_name
)
{
return
var
;
}
}
return
nullptr
;
}
Node
*
GetOutputVarNodeByVarName
(
const
std
::
string
&
var_name
,
const
Node
*
op_node
)
{
for
(
auto
*
var
:
op_node
->
outputs
)
{
if
(
var
->
Name
()
==
var_name
)
{
return
var
;
}
}
return
nullptr
;
}
const
bool
is_float_equal
(
float
a
,
float
b
,
float
eps
)
{
return
std
::
fabs
(
a
-
b
)
<=
eps
;
}
}
// namespace ipu
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/platform/device/ipu/ipu_utils.h"
namespace
paddle
{
namespace
platform
{
namespace
ipu
{
using
framework
::
ir
::
Graph
;
using
framework
::
ir
::
Node
;
using
framework
::
OpDesc
;
#define REGISTER_HANDLER(name, func) \
static bool __UNUSED_##name = \
paddle::platform::ipu::RegisterHandler(#name, func)
using
SymbolHandler
=
std
::
function
<
Node
*
(
Graph
*
,
Node
*
)
>
;
std
::
unordered_map
<
std
::
string
,
SymbolHandler
>
&
SymbolHandlers
();
bool
RegisterHandler
(
const
std
::
string
&
,
const
SymbolHandler
&
);
SymbolHandler
GetHandler
(
const
std
::
string
&
);
void
ConnectNodes
(
Node
*
first_node
,
Node
*
next_node
);
void
DisConnectNodes
(
Node
*
first_node
,
Node
*
next_node
);
void
ClearNode
(
Node
*
node
);
void
CopyOpAttr
(
const
std
::
string
&
attr_name
,
OpDesc
*
op
,
OpDesc
*
new_op
,
bool
override
=
false
);
const
int
VarType2OnnxDtype
(
const
int
type
);
const
std
::
string
VarType2PopStr
(
const
int
type
);
Node
*
GetInputVarNode
(
const
std
::
string
&
input_name
,
const
Node
*
op_node
,
const
int
id
=
0
);
Node
*
GetOutputVarNode
(
const
std
::
string
&
output_name
,
const
Node
*
op_node
,
const
int
id
=
0
);
Node
*
GetInputVarNodeByVarName
(
const
std
::
string
&
var_name
,
const
Node
*
op_node
);
Node
*
GetOutputVarNodeByVarName
(
const
std
::
string
&
var_name
,
const
Node
*
op_node
);
const
bool
is_float_equal
(
float
a
,
float
b
,
float
eps
=
1e-8
);
}
// namespace ipu
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/ipu/popart_canonicalization/elementwise_ops.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
platform
{
namespace
ipu
{
namespace
{
Node
*
elementwise_op_handler
(
Graph
*
graph
,
Node
*
node
,
const
std
::
string
&
type
)
{
auto
*
op
=
node
->
Op
();
auto
x_shape
=
GetInputVarNode
(
"X"
,
node
)
->
Var
()
->
GetShape
();
int64_t
x_rank
=
x_shape
.
size
();
auto
y_shape
=
GetInputVarNode
(
"Y"
,
node
)
->
Var
()
->
GetShape
();
int64_t
y_rank
=
y_shape
.
size
();
auto
axis
=
BOOST_GET_CONST
(
int
,
op
->
GetAttr
(
"axis"
));
if
(
axis
==
-
1
||
axis
==
x_rank
-
1
||
x_rank
==
y_rank
)
{
auto
new_node
=
CreateBaseOp
(
graph
,
node
,
type
,
{
GetInputVarNode
(
"X"
,
node
),
GetInputVarNode
(
"Y"
,
node
)},
node
->
outputs
);
return
new_node
;
}
else
{
auto
y_new_shape
=
std
::
vector
<
int64_t
>
(
x_rank
,
1
);
for
(
int
i
=
axis
;
i
<
axis
+
y_rank
;
++
i
)
{
y_new_shape
[
i
]
=
y_shape
[
i
-
axis
];
}
auto
attrs
=
AttributeMap
{
{
"value"
,
y_new_shape
},
{
"dims"
,
std
::
vector
<
int64_t
>
{
x_rank
}},
{
"dtype"
,
ONNXDataType
::
INT64
},
};
// constant
auto
new_node_const
=
CreateConst
(
graph
,
node
,
{},
{},
attrs
);
// reshape
auto
new_node_reshape
=
CreateBaseOp
(
graph
,
node
,
"popart_reshape"
,
{
GetInputVarNode
(
"Y"
,
node
),
new_node_const
->
outputs
[
0
]},
{});
// elementwise_op
auto
new_node
=
CreateBaseOp
(
graph
,
node
,
type
,
{
GetInputVarNode
(
"X"
,
node
),
new_node_reshape
->
outputs
[
0
]},
node
->
outputs
);
return
new_node
;
}
}
Node
*
elementwise_add_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
elementwise_op_handler
(
graph
,
node
,
"popart_add"
);
}
Node
*
elementwise_sub_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
elementwise_op_handler
(
graph
,
node
,
"popart_sub"
);
}
Node
*
elementwise_div_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
elementwise_op_handler
(
graph
,
node
,
"popart_div"
);
}
Node
*
elementwise_mul_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
elementwise_op_handler
(
graph
,
node
,
"popart_mul"
);
}
Node
*
elementwise_min_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
elementwise_op_handler
(
graph
,
node
,
"popart_min"
);
}
Node
*
elementwise_max_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
elementwise_op_handler
(
graph
,
node
,
"popart_max"
);
}
Node
*
elementwise_pow_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
elementwise_op_handler
(
graph
,
node
,
"popart_pow"
);
}
Node
*
elementwise_mod_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
elementwise_op_handler
(
graph
,
node
,
"popart_mod"
);
}
REGISTER_HANDLER
(
elementwise_add
,
elementwise_add_handler
);
REGISTER_HANDLER
(
elementwise_sub
,
elementwise_sub_handler
);
REGISTER_HANDLER
(
elementwise_div
,
elementwise_div_handler
);
REGISTER_HANDLER
(
elementwise_mul
,
elementwise_mul_handler
);
REGISTER_HANDLER
(
elementwise_min
,
elementwise_min_handler
);
REGISTER_HANDLER
(
elementwise_max
,
elementwise_max_handler
);
REGISTER_HANDLER
(
elementwise_pow
,
elementwise_pow_handler
);
REGISTER_HANDLER
(
elementwise_mod
,
elementwise_mod_handler
);
}
// namespace
}
// namespace ipu
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/ipu/popart_canonicalization/logic_ops.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
platform
{
namespace
ipu
{
namespace
{
Node
*
equal_handler
(
Graph
*
graph
,
Node
*
node
)
{
auto
new_node
=
CreateBaseOp
(
graph
,
node
,
"popart_equal"
,
{
GetInputVarNode
(
"X"
,
node
),
GetInputVarNode
(
"Y"
,
node
)},
node
->
outputs
);
return
new_node
;
}
REGISTER_HANDLER
(
equal
,
equal_handler
);
}
// namespace
}
// namespace ipu
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
platform
{
namespace
ipu
{
namespace
{
Node
*
mean_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
CreateBaseOp
(
graph
,
node
,
"popart_reducemean"
,
{
GetInputVarNode
(
"X"
,
node
)},
{
GetOutputVarNode
(
"Out"
,
node
)},
{
{
"keepdims"
,
int64_t
{
0
}},
});
}
Node
*
pow_handler
(
Graph
*
graph
,
Node
*
node
)
{
auto
*
op
=
node
->
Op
();
if
(
op
->
HasInput
(
"FactorTensor"
)
&&
!
op
->
Input
(
"FactorTensor"
).
empty
())
{
return
CreateBaseOp
(
graph
,
node
,
"popart_pow"
,
{
GetInputVarNode
(
"X"
,
node
),
GetInputVarNode
(
"FactorTensor"
,
node
)},
node
->
outputs
);
}
else
{
// Op(pow) -> Op(Constant)->Var(const_out)->Op(Pow)
auto
value_
=
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"factor"
));
auto
attrs
=
MakeConstAttrMapFromValue
<
float
>
(
value_
,
{
1
},
ONNXDataType
::
FLOAT
);
auto
new_node_const
=
CreateConst
(
graph
,
node
,
{},
{},
attrs
);
return
CreateBaseOp
(
graph
,
node
,
"popart_pow"
,
{
GetInputVarNode
(
"X"
,
node
),
new_node_const
->
outputs
[
0
]},
node
->
outputs
);
}
}
Node
*
mul_handler
(
Graph
*
graph
,
Node
*
node
)
{
auto
*
op
=
node
->
Op
();
auto
x_num_col_dims
=
BOOST_GET_CONST
(
int
,
op
->
GetAttr
(
"x_num_col_dims"
));
auto
y_num_col_dims
=
BOOST_GET_CONST
(
int
,
op
->
GetAttr
(
"y_num_col_dims"
));
auto
x_shape_
=
GetInputVarNode
(
"X"
,
node
)
->
Var
()
->
GetShape
();
auto
y_shape_
=
GetInputVarNode
(
"Y"
,
node
)
->
Var
()
->
GetShape
();
// build the shape for reshape
std
::
vector
<
int64_t
>
reshape_shape_
{};
for
(
int
left
=
0
;
left
<
x_num_col_dims
;
left
++
)
{
reshape_shape_
.
push_back
(
int64_t
(
x_shape_
[
left
]));
}
for
(
int
right
=
y_num_col_dims
;
right
<
y_shape_
.
size
();
right
++
)
{
reshape_shape_
.
push_back
(
int64_t
(
y_shape_
[
right
]));
}
auto
x_flatten
=
CreateBaseOp
(
graph
,
node
,
"popart_flatten"
,
{
GetInputVarNode
(
"X"
,
node
)},
{},
{{
"axis"
,
int64_t
(
x_num_col_dims
)}});
auto
y_flatten
=
CreateBaseOp
(
graph
,
node
,
"popart_flatten"
,
{
GetInputVarNode
(
"Y"
,
node
)},
{},
{{
"axis"
,
int64_t
(
y_num_col_dims
)}});
auto
matmul
=
CreateBaseOp
(
graph
,
node
,
"popart_matmul"
,
{
x_flatten
->
outputs
[
0
],
y_flatten
->
outputs
[
0
]},
{},
{});
auto
reshape_const
=
CreateConst
(
graph
,
node
,
{},
{},
{{
"value"
,
reshape_shape_
},
{
"dims"
,
std
::
vector
<
int64_t
>
{
int64_t
(
reshape_shape_
.
size
())}},
{
"dtype"
,
ONNXDataType
::
INT64
}});
return
CreateBaseOp
(
graph
,
node
,
"popart_reshape"
,
{
matmul
->
outputs
[
0
],
reshape_const
->
outputs
[
0
]},
node
->
outputs
,
{});
}
Node
*
matmul_handler
(
Graph
*
graph
,
Node
*
node
)
{
auto
*
op
=
node
->
Op
();
auto
transpose_x
=
BOOST_GET_CONST
(
bool
,
op
->
GetAttr
(
"transpose_X"
));
auto
transpose_y
=
BOOST_GET_CONST
(
bool
,
op
->
GetAttr
(
"transpose_Y"
));
auto
alpha
=
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"alpha"
));
auto
x_shape
=
GetInputVarNode
(
"X"
,
node
)
->
Var
()
->
GetShape
();
auto
y_shape
=
GetInputVarNode
(
"Y"
,
node
)
->
Var
()
->
GetShape
();
int
x_rank
=
x_shape
.
size
();
std
::
vector
<
int64_t
>
perm
;
if
(
x_rank
==
1
)
{
perm
=
std
::
vector
<
int64_t
>
{
0
};
}
else
if
(
x_rank
==
2
)
{
return
CreateGemm
(
graph
,
node
,
{
GetInputVarNode
(
"X"
,
node
),
GetInputVarNode
(
"Y"
,
node
)},
node
->
outputs
,
transpose_x
,
transpose_y
,
alpha
);
}
else
if
(
x_rank
==
3
)
{
perm
=
std
::
vector
<
int64_t
>
{
0
,
2
,
1
};
}
else
if
(
x_rank
==
4
)
{
perm
=
std
::
vector
<
int64_t
>
{
0
,
1
,
3
,
2
};
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"op matmul with input rank == %d"
,
x_rank
));
}
Node
*
x_node
=
GetInputVarNode
(
"X"
,
node
);
Node
*
y_node
=
GetInputVarNode
(
"Y"
,
node
);
if
(
transpose_x
)
{
x_node
=
CreateBaseOp
(
graph
,
node
,
"popart_transpose"
,
{
GetInputVarNode
(
"X"
,
node
)},
{},
{{
"perm"
,
perm
}});
x_node
=
x_node
->
outputs
[
0
];
}
if
(
transpose_y
)
{
y_node
=
CreateBaseOp
(
graph
,
node
,
"popart_transpose"
,
{
GetInputVarNode
(
"Y"
,
node
)},
{},
{{
"perm"
,
perm
}});
y_node
=
y_node
->
outputs
[
0
];
}
if
(
is_float_equal
(
alpha
,
1.0
))
{
auto
o_node
=
CreateBaseOp
(
graph
,
node
,
"popart_matmul"
,
{
x_node
,
y_node
},
{});
auto
attr
=
MakeConstAttrMapFromValue
(
alpha
,
{
1
},
ONNXDataType
::
FLOAT
);
auto
const_node
=
CreateConst
(
graph
,
node
,
{},
{},
attr
);
return
CreateBaseOp
(
graph
,
node
,
"popart_mul"
,
{
o_node
->
outputs
[
0
],
const_node
->
outputs
[
0
]},
node
->
outputs
);
}
else
{
return
CreateBaseOp
(
graph
,
node
,
"popart_matmul"
,
{
x_node
,
y_node
},
node
->
outputs
);
}
}
Node
*
sum_handler
(
Graph
*
graph
,
Node
*
node
)
{
return
CreateBaseOp
(
graph
,
node
,
"popart_sum"
,
node
->
inputs
,
node
->
outputs
);
}
Node
*
softmax_handler
(
Graph
*
graph
,
Node
*
node
)
{
auto
*
op
=
node
->
Op
();
auto
axis
=
BOOST_GET_CONST
(
int
,
op
->
GetAttr
(
"axis"
));
return
CreateSoftmaxOpset11
(
graph
,
node
,
node
->
inputs
,
node
->
outputs
,
axis
);
}
Node
*
scale_handler
(
Graph
*
graph
,
Node
*
node
)
{
auto
*
op
=
node
->
Op
();
auto
scale_
=
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"scale"
));
auto
bias_
=
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"bias"
));
auto
bias_after_scale_
=
BOOST_GET_CONST
(
bool
,
op
->
GetAttr
(
"bias_after_scale"
));
auto
data_type_
=
GetInputVarNode
(
"X"
,
node
)
->
Var
()
->
GetDataType
();
auto
new_node_bias_var
=
CreateConst
(
graph
,
node
,
{},
{},
{{
"value"
,
std
::
vector
<
float
>
{
bias_
}},
{
"dims"
,
std
::
vector
<
int64_t
>
{
1
}},
{
"dtype"
,
ONNXDataType
::
FLOAT
}});
new_node_bias_var
=
new_node_bias_var
->
outputs
[
0
];
Node
*
new_node_scale_var
=
nullptr
;
if
(
op
->
HasInput
(
"ScaleTensor"
)
&&
!
op
->
Input
(
"ScaleTensor"
).
empty
())
{
new_node_scale_var
=
GetInputVarNode
(
"ScaleTensor"
,
node
);
}
else
{
new_node_scale_var
=
CreateConst
(
graph
,
node
,
{},
{},
{{
"value"
,
std
::
vector
<
float
>
{
scale_
}},
{
"dims"
,
std
::
vector
<
int64_t
>
{
1
}},
{
"dtype"
,
ONNXDataType
::
FLOAT
}});
new_node_scale_var
=
new_node_scale_var
->
outputs
[
0
];
}
// convert to float32
auto
new_node_cast
=
CreateCast
(
graph
,
node
,
{
GetInputVarNode
(
"X"
,
node
)},
{},
static_cast
<
int
>
(
framework
::
proto
::
VarType
::
FP32
));
Node
*
result
=
nullptr
;
if
(
bias_after_scale_
)
{
auto
new_node_mul
=
CreateBaseOp
(
graph
,
node
,
"popart_mul"
,
{
new_node_cast
->
outputs
[
0
],
new_node_scale_var
},
{},
{});
result
=
CreateBaseOp
(
graph
,
node
,
"popart_add"
,
{
new_node_mul
->
outputs
[
0
],
new_node_bias_var
},
{},
{});
}
else
{
auto
new_node_add
=
CreateBaseOp
(
graph
,
node
,
"popart_add"
,
{
new_node_cast
->
outputs
[
0
],
new_node_bias_var
},
{},
{});
result
=
CreateBaseOp
(
graph
,
node
,
"popart_mul"
,
{
new_node_add
->
outputs
[
0
],
new_node_scale_var
},
{},
{});
}
auto
result_after_cast
=
CreateCast
(
graph
,
node
,
result
->
outputs
,
node
->
outputs
,
static_cast
<
int
>
(
data_type_
));
return
result_after_cast
;
}
Node
*
cross_entropy2_handler
(
Graph
*
graph
,
Node
*
node
)
{
auto
*
op
=
node
->
Op
();
auto
ignoreIndex
=
BOOST_GET_CONST
(
int
,
op
->
GetAttr
(
"ignore_index"
));
auto
new_cast
=
CreateCast
(
graph
,
node
,
{
GetInputVarNode
(
"Label"
,
node
)},
{},
framework
::
proto
::
VarType
::
INT32
);
auto
label_shape_
=
GetInputVarNode
(
"Label"
,
node
)
->
Var
()
->
GetShape
();
if
(
label_shape_
.
size
()
==
1
)
{
return
CreateBaseOp
(
graph
,
node
,
"popart_nllloss"
,
{
GetInputVarNode
(
"X"
,
node
),
new_cast
->
outputs
[
0
]},
{
GetOutputVarNode
(
"Y"
,
node
)},
{
{
"ignoreIndex"
,
ignoreIndex
},
});
}
else
{
std
::
vector
<
int64_t
>
new_shape_
{
label_shape_
[
0
]};
auto
const_before_loss
=
CreateBaseOp
(
graph
,
node
,
"popart_constant"
,
{},
{},
{{
"value"
,
new_shape_
},
{
"dims"
,
std
::
vector
<
int64_t
>
{
static_cast
<
int64_t
>
(
new_shape_
.
size
())}},
{
"dtype"
,
ONNXDataType
::
INT64
}});
auto
reshape_before_loss
=
CreateBaseOp
(
graph
,
node
,
"popart_reshape"
,
{
new_cast
->
outputs
[
0
],
const_before_loss
->
outputs
[
0
]},
{},
{});
auto
nllloss
=
CreateBaseOp
(
graph
,
node
,
"popart_nllloss"
,
{
GetInputVarNode
(
"X"
,
node
),
reshape_before_loss
->
outputs
[
0
]},
{},
{
{
"ignoreIndex"
,
ignoreIndex
},
});
auto
const_after_loss
=
CreateBaseOp
(
graph
,
node
,
"popart_constant"
,
{},
{},
{{
"value"
,
label_shape_
},
{
"dims"
,
std
::
vector
<
int64_t
>
{
static_cast
<
int64_t
>
(
label_shape_
.
size
())}},
{
"dtype"
,
ONNXDataType
::
INT64
}});
auto
reshape_after_loss
=
CreateBaseOp
(
graph
,
node
,
"popart_reshape"
,
{
nllloss
->
outputs
[
0
],
const_after_loss
->
outputs
[
0
]},
{
GetOutputVarNode
(
"Y"
,
node
)},
{});
return
reshape_after_loss
;
}
}
REGISTER_HANDLER
(
mean
,
mean_handler
);
REGISTER_HANDLER
(
pow
,
pow_handler
);
REGISTER_HANDLER
(
mul
,
mul_handler
);
REGISTER_HANDLER
(
matmul
,
matmul_handler
);
REGISTER_HANDLER
(
sum
,
sum_handler
);
REGISTER_HANDLER
(
softmax
,
softmax_handler
);
REGISTER_HANDLER
(
scale
,
scale_handler
);
REGISTER_HANDLER
(
cross_entropy2
,
cross_entropy2_handler
);
}
// namespace
}
// namespace ipu
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
platform
{
namespace
ipu
{
namespace
{
Node
*
conv2d_handler
(
Graph
*
graph
,
Node
*
node
)
{
OpDesc
*
op
=
node
->
Op
();
auto
dilations_
=
BOOST_GET_CONST
(
std
::
vector
<
int
>
,
op
->
GetAttr
(
"dilations"
));
auto
dilations
=
std
::
vector
<
int64_t
>
{
dilations_
.
begin
(),
dilations_
.
end
()};
auto
group_
=
BOOST_GET_CONST
(
int
,
op
->
GetAttr
(
"groups"
));
auto
pads_
=
BOOST_GET_CONST
(
std
::
vector
<
int
>
,
op
->
GetAttr
(
"paddings"
));
if
(
pads_
.
size
()
==
2
)
{
pads_
.
push_back
(
pads_
[
0
]);
pads_
.
push_back
(
pads_
[
1
]);
}
auto
pads
=
std
::
vector
<
int64_t
>
{
pads_
.
begin
(),
pads_
.
end
()};
auto
stride_
=
BOOST_GET_CONST
(
std
::
vector
<
int
>
,
op
->
GetAttr
(
"strides"
));
auto
stride
=
std
::
vector
<
int64_t
>
{
stride_
.
begin
(),
stride_
.
end
()};
if
(
op
->
HasInput
(
"Bias"
)
&&
!
op
->
Input
(
"Bias"
).
empty
())
{
return
CreateConv
(
graph
,
node
,
{
GetInputVarNode
(
"Input"
,
node
),
GetInputVarNode
(
"Filter"
,
node
),
GetInputVarNode
(
"Bias"
,
node
),
},
node
->
outputs
,
dilations
,
group_
,
{},
pads
,
stride
);
}
else
{
return
CreateConv
(
graph
,
node
,
{
GetInputVarNode
(
"Input"
,
node
),
GetInputVarNode
(
"Filter"
,
node
),
},
node
->
outputs
,
dilations
,
group_
,
{},
pads
,
stride
);
}
}
Node
*
batch_norm_handler
(
Graph
*
graph
,
Node
*
node
)
{
auto
*
op
=
node
->
Op
();
std
::
vector
<
Node
*>
inputs
;
inputs
.
push_back
(
GetInputVarNode
(
"X"
,
node
));
inputs
.
push_back
(
GetInputVarNode
(
"Scale"
,
node
));
inputs
.
push_back
(
GetInputVarNode
(
"Bias"
,
node
));
inputs
.
push_back
(
GetInputVarNode
(
"Mean"
,
node
));
inputs
.
push_back
(
GetInputVarNode
(
"Variance"
,
node
));
int64_t
num_outputs
=
1
;
std
::
vector
<
Node
*>
outputs
;
auto
is_test_type
=
op
->
GetAttrType
(
"is_test"
);
bool
is_test
;
if
(
is_test_type
==
0
)
{
// int
is_test
=
BOOST_GET_CONST
(
int
,
op
->
GetAttr
(
"is_test"
));
}
else
{
// bool
is_test
=
BOOST_GET_CONST
(
bool
,
op
->
GetAttr
(
"is_test"
));
}
outputs
.
push_back
(
GetOutputVarNode
(
"Y"
,
node
));
if
(
!
is_test
)
{
outputs
.
push_back
(
GetOutputVarNode
(
"MeanOut"
,
node
));
outputs
.
push_back
(
GetOutputVarNode
(
"VarianceOut"
,
node
));
outputs
.
push_back
(
GetOutputVarNode
(
"SavedMean"
,
node
));
outputs
.
push_back
(
GetOutputVarNode
(
"SavedVariance"
,
node
));
num_outputs
=
5
;
}
// outputs.push_back(GetOutputVarNode("ReserveSpace", node));
auto
momentum
=
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"momentum"
));
auto
epsilon
=
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"epsilon"
));
// data_layout
return
CreateBaseOp
(
graph
,
node
,
"popart_batchnormalization"
,
inputs
,
outputs
,
{
{
"momentum"
,
momentum
},
{
"epsilon"
,
epsilon
},
{
"num_outputs"
,
num_outputs
},
});
}
Node
*
pool2d_handler
(
Graph
*
graph
,
Node
*
node
)
{
auto
*
op
=
node
->
Op
();
auto
pooling_type
=
BOOST_GET_CONST
(
std
::
string
,
op
->
GetAttr
(
"pooling_type"
));
auto
global_pooling
=
BOOST_GET_CONST
(
bool
,
op
->
GetAttr
(
"global_pooling"
));
if
(
global_pooling
)
{
if
(
pooling_type
==
"max"
)
{
return
CreateBaseOp
(
graph
,
node
,
"popart_globalmaxpool"
,
node
->
inputs
,
node
->
outputs
);
}
else
if
(
pooling_type
==
"avg"
)
{
return
CreateBaseOp
(
graph
,
node
,
"popart_globalaveragepool"
,
node
->
inputs
,
node
->
outputs
);
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"op pool2d with unkonwn pooling_type: %s"
,
pooling_type
));
}
}
if
(
op
->
HasAttr
(
"padding_algorithm"
))
{
auto
padding_algorithm
=
BOOST_GET_CONST
(
std
::
string
,
op
->
GetAttr
(
"padding_algorithm"
));
if
(
padding_algorithm
!=
"EXPLICIT"
)
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"op pool2d with unkonwn padding_algorithm: %s"
,
padding_algorithm
));
}
}
auto
ksize
=
BOOST_GET_CONST
(
std
::
vector
<
int
>
,
op
->
GetAttr
(
"ksize"
));
auto
kernel_shape
=
std
::
vector
<
int64_t
>
{
ksize
.
begin
(),
ksize
.
end
()};
auto
ceil_mode_
=
BOOST_GET_CONST
(
bool
,
op
->
GetAttr
(
"ceil_mode"
));
auto
ceil_mode
=
int64_t
(
ceil_mode_
?
1
:
0
);
auto
paddings
=
BOOST_GET_CONST
(
std
::
vector
<
int
>
,
op
->
GetAttr
(
"paddings"
));
auto
pads
=
std
::
vector
<
int64_t
>
{
paddings
.
begin
(),
paddings
.
end
()};
if
(
pads
.
size
()
==
2
)
{
pads
.
push_back
(
paddings
[
0
]);
pads
.
push_back
(
paddings
[
1
]);
}
auto
strides_
=
BOOST_GET_CONST
(
std
::
vector
<
int
>
,
op
->
GetAttr
(
"strides"
));
auto
strides
=
std
::
vector
<
int64_t
>
{
strides_
.
begin
(),
strides_
.
end
()};
if
(
pooling_type
==
"max"
)
{
int64_t
num_outputs
=
1
;
auto
dilations
=
std
::
vector
<
int64_t
>
{};
int64_t
storage_order
=
0
;
return
CreateBaseOp
(
graph
,
node
,
"popart_maxpool"
,
node
->
inputs
,
node
->
outputs
,
{
{
"num_outputs"
,
num_outputs
},
{
"kernel_shape"
,
kernel_shape
},
{
"ceil_mode"
,
ceil_mode
},
{
"dilations"
,
dilations
},
{
"pads"
,
pads
},
{
"storage_order"
,
storage_order
},
{
"strides"
,
strides
},
});
}
else
if
(
pooling_type
==
"avg"
)
{
int64_t
count_include_pad
=
0
;
return
CreateBaseOp
(
graph
,
node
,
"popart_averagepool"
,
node
->
inputs
,
node
->
outputs
,
{
{
"kernel_shape"
,
kernel_shape
},
{
"ceil_mode"
,
ceil_mode
},
{
"count_include_pad"
,
count_include_pad
},
{
"pads"
,
pads
},
{
"strides"
,
strides
},
});
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"op pool2d with unkonwn pooling_type: %s"
,
pooling_type
));
}
}
Node
*
group_norm_handler
(
Graph
*
graph
,
Node
*
node
)
{
auto
*
op
=
node
->
Op
();
auto
epsilon_
=
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"epsilon"
));
auto
groups_
=
BOOST_GET_CONST
(
int
,
op
->
GetAttr
(
"groups"
));
auto
groups
=
int64_t
{
groups_
};
auto
attrs_
=
AttributeMap
{{
"epsilon"
,
epsilon_
},
{
"num_groups"
,
groups
}};
std
::
vector
<
Node
*>
inputs_
=
{
GetInputVarNode
(
"X"
,
node
),
GetInputVarNode
(
"Scale"
,
node
),
GetInputVarNode
(
"Bias"
,
node
)};
std
::
vector
<
Node
*>
outputs_
=
{
GetOutputVarNode
(
"Y"
,
node
),
GetOutputVarNode
(
"Mean"
,
node
),
GetOutputVarNode
(
"Variance"
,
node
)};
return
CreateBaseOp
(
graph
,
node
,
"popart_groupnormalization_v2"
,
inputs_
,
outputs_
,
attrs_
);
}
Node
*
instance_norm_handler
(
Graph
*
graph
,
Node
*
node
)
{
auto
*
op
=
node
->
Op
();
auto
epsilon_
=
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"epsilon"
));
auto
attrs_
=
AttributeMap
{{
"epsilon"
,
epsilon_
}};
std
::
vector
<
Node
*>
inputs_
=
{
GetInputVarNode
(
"X"
,
node
),
GetInputVarNode
(
"Scale"
,
node
),
GetInputVarNode
(
"Bias"
,
node
)};
std
::
vector
<
Node
*>
outputs_
=
{
GetOutputVarNode
(
"Y"
,
node
)};
return
CreateBaseOp
(
graph
,
node
,
"popart_instancenormalization"
,
inputs_
,
outputs_
,
attrs_
);
}
Node
*
layer_norm_handler
(
Graph
*
graph
,
Node
*
node
)
{
auto
*
op
=
node
->
Op
();
auto
begin_norm_axis_
=
BOOST_GET_CONST
(
int
,
op
->
GetAttr
(
"begin_norm_axis"
));
auto
input_shape_
=
GetInputVarNode
(
"X"
,
node
)
->
Var
()
->
GetShape
();
std
::
vector
<
int64_t
>
norm_shape_
{
1
,
1
};
for
(
int
i
=
0
;
i
<
input_shape_
.
size
();
i
++
)
{
if
(
i
<
begin_norm_axis_
)
{
norm_shape_
[
0
]
*=
input_shape_
[
i
];
}
else
{
norm_shape_
[
1
]
*=
input_shape_
[
i
];
}
}
auto
attrs1
=
AttributeMap
{
{
"value"
,
norm_shape_
},
{
"dims"
,
std
::
vector
<
int64_t
>
{
static_cast
<
int64_t
>
(
norm_shape_
.
size
())}},
{
"dtype"
,
ONNXDataType
::
INT64
}};
auto
reshape1_const
=
CreateBaseOp
(
graph
,
node
,
"popart_constant"
,
{},
{},
attrs1
);
auto
new_node_reshape1
=
CreateBaseOp
(
graph
,
node
,
"popart_reshape"
,
{
GetInputVarNode
(
"X"
,
node
),
reshape1_const
->
outputs
[
0
]},
{},
{});
auto
epsilon_
=
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"epsilon"
));
int64_t
groups_
=
1
;
auto
groupnorm_attrs_
=
AttributeMap
{{
"epsilon"
,
epsilon_
},
{
"num_groups"
,
groups_
}};
auto
out_Y_
=
MakeVarNode
(
graph
,
node
);
CreateBaseOp
(
graph
,
node
,
"popart_groupnormalization_v2"
,
{
new_node_reshape1
->
outputs
[
0
],
GetInputVarNode
(
"Scale"
,
node
),
GetInputVarNode
(
"Bias"
,
node
)},
{
out_Y_
,
GetOutputVarNode
(
"Mean"
,
node
),
GetOutputVarNode
(
"Variance"
,
node
)},
groupnorm_attrs_
);
auto
attrs2
=
AttributeMap
{
{
"value"
,
input_shape_
},
{
"dims"
,
std
::
vector
<
int64_t
>
{
static_cast
<
int64_t
>
(
input_shape_
.
size
())}},
{
"dtype"
,
ONNXDataType
::
INT64
}};
auto
reshape2_const
=
CreateBaseOp
(
graph
,
node
,
"popart_constant"
,
{},
{},
attrs2
);
auto
new_node_reshape2
=
CreateBaseOp
(
graph
,
node
,
"popart_reshape"
,
{
out_Y_
,
reshape2_const
->
outputs
[
0
]},
{
GetOutputVarNode
(
"Y"
,
node
)},
{});
return
new_node_reshape2
;
}
Node
*
dropout_handler
(
Graph
*
graph
,
Node
*
node
)
{
auto
*
op
=
node
->
Op
();
auto
dropout_prob_
=
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"dropout_prob"
));
auto
dropout_implementation_
=
BOOST_GET_CONST
(
std
::
string
,
op
->
GetAttr
(
"dropout_implementation"
));
auto
is_test_type_
=
op
->
GetAttrType
(
"is_test"
);
bool
is_test_
;
if
(
is_test_type_
==
0
)
{
// int
is_test_
=
BOOST_GET_CONST
(
int
,
op
->
GetAttr
(
"is_test"
));
}
else
{
// bool
is_test_
=
BOOST_GET_CONST
(
bool
,
op
->
GetAttr
(
"is_test"
));
}
if
(
is_test_
)
{
if
(
dropout_implementation_
==
"upscale_in_train"
)
{
return
CreateBaseOp
(
graph
,
node
,
"popart_identity"
,
{
GetInputVarNode
(
"X"
,
node
)},
{
GetOutputVarNode
(
"Out"
,
node
)},
{});
}
else
if
(
dropout_implementation_
==
"downgrade_in_infer"
)
{
auto
scale
=
CreateConst
(
graph
,
node
,
{},
{},
{{
"value"
,
std
::
vector
<
float
>
{
1
-
dropout_prob_
}},
{
"dims"
,
std
::
vector
<
int64_t
>
{
1
}},
{
"dtype"
,
ONNXDataType
::
FLOAT
}});
return
CreateBaseOp
(
graph
,
node
,
"popart_mul"
,
{
GetInputVarNode
(
"X"
,
node
),
scale
->
outputs
[
0
]},
{
GetOutputVarNode
(
"Out"
,
node
)},
{});
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Invalid dropout_implementation"
));
}
}
else
{
if
(
dropout_implementation_
==
"upscale_in_train"
)
{
auto
attrs_
=
AttributeMap
{{
"num_outputs"
,
(
int64_t
)
1
},
{
"ratio"
,
dropout_prob_
}};
return
CreateBaseOp
(
graph
,
node
,
"popart_dropout"
,
{
GetInputVarNode
(
"X"
,
node
)},
{
GetOutputVarNode
(
"Out"
,
node
)},
attrs_
);
}
else
if
(
dropout_implementation_
==
"downgrade_in_infer"
)
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Do not support downgrade_in_infer with training"
));
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Invalid dropout_implementation"
));
}
}
}
REGISTER_HANDLER
(
pool2d
,
pool2d_handler
);
REGISTER_HANDLER
(
batch_norm
,
batch_norm_handler
);
REGISTER_HANDLER
(
group_norm
,
group_norm_handler
);
REGISTER_HANDLER
(
instance_norm
,
instance_norm_handler
);
REGISTER_HANDLER
(
layer_norm
,
layer_norm_handler
);
REGISTER_HANDLER
(
conv2d
,
conv2d_handler
);
REGISTER_HANDLER
(
dropout
,
dropout_handler
);
}
// namespace
}
// namespace ipu
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.cc
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h"
namespace
paddle
{
namespace
platform
{
namespace
ipu
{
// singleton
static
int
var_count
=
0
;
static
int
op_count
=
0
;
const
std
::
string
GenerateVarName
()
{
return
std
::
string
(
"_gen_var_"
)
+
std
::
to_string
(
var_count
++
);
}
const
std
::
string
GenerateOpName
()
{
return
std
::
string
(
"_gen_op_"
)
+
std
::
to_string
(
op_count
++
);
}
const
std
::
string
CreateOpIdentifyId
(
Node
*
node
)
{
// format: op_type|out_var0|out_var1|...|_gen_*
// this name will be used as op name when exporting onnx model from popart
auto
op_type
=
node
->
Name
();
std
::
string
op_out
=
""
;
for
(
auto
*
out_node
:
node
->
outputs
)
{
op_out
+=
"|"
;
op_out
+=
out_node
->
Name
();
}
return
{
op_type
+
op_out
+
"|"
+
GenerateOpName
()};
}
Node
*
MakeVarNode
(
Graph
*
graph
,
Node
*
node
)
{
auto
var_name
=
GenerateVarName
();
auto
var_desc
=
std
::
make_unique
<
framework
::
VarDesc
>
(
var_name
);
auto
var
=
graph
->
CreateVarNode
(
var_desc
.
get
());
return
var
;
}
Node
*
MakeOpNode
(
Graph
*
graph
,
Node
*
node
,
const
std
::
string
&
type
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
)
{
auto
op_desc
=
std
::
make_unique
<
framework
::
OpDesc
>
();
op_desc
->
SetType
(
type
);
auto
op
=
graph
->
CreateOpNode
(
op_desc
.
get
());
for
(
auto
*
in
:
inputs
)
{
ConnectNodes
(
in
,
op
);
}
if
(
outputs
.
empty
())
{
auto
var
=
MakeVarNode
(
graph
,
node
);
ConnectNodes
(
op
,
var
);
}
else
{
for
(
auto
*
out
:
outputs
)
{
ConnectNodes
(
op
,
out
);
}
}
// i/o
std
::
vector
<
std
::
string
>
input_names
;
for
(
auto
node
:
op
->
inputs
)
{
input_names
.
push_back
(
node
->
Name
());
}
op
->
Op
()
->
SetInput
(
"__inputs__"
,
input_names
);
std
::
vector
<
std
::
string
>
output_names
;
for
(
auto
node
:
op
->
outputs
)
{
output_names
.
push_back
(
node
->
Name
());
}
op
->
Op
()
->
SetOutput
(
"__outputs__"
,
output_names
);
op
->
Op
()
->
Flush
();
return
op
;
}
Node
*
CreateBaseOp
(
Graph
*
graph
,
Node
*
node
,
const
std
::
string
&
type
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
,
const
AttributeMap
&
attrs
)
{
auto
new_node
=
MakeOpNode
(
graph
,
node
,
type
,
inputs
,
outputs
);
if
(
!
attrs
.
empty
())
{
new_node
->
Op
()
->
SetAttrMap
(
attrs
);
}
// deal special attr
if
(
!
new_node
->
Op
()
->
HasAttr
(
sIpuIndexAttr
))
{
CopyOpAttr
(
sIpuIndexAttr
,
node
->
Op
(),
new_node
->
Op
());
}
if
(
!
new_node
->
Op
()
->
HasAttr
(
sIpuStageAttr
))
{
CopyOpAttr
(
sIpuStageAttr
,
node
->
Op
(),
new_node
->
Op
());
}
{
new_node
->
Op
()
->
SetAttr
(
sOpIdentifyIdAttr
,
CreateOpIdentifyId
(
node
));
new_node
->
Op
()
->
Flush
();
}
return
new_node
;
}
Node
*
CreateConst
(
Graph
*
graph
,
Node
*
node
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
,
const
AttributeMap
&
attrs
)
{
return
CreateBaseOp
(
graph
,
node
,
"popart_constant"
,
inputs
,
outputs
,
attrs
);
}
Node
*
CreateCast
(
Graph
*
graph
,
Node
*
node
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
,
const
int
otype
)
{
auto
to
=
VarType2PopStr
(
otype
);
return
CreateBaseOp
(
graph
,
node
,
"popart_cast"
,
inputs
,
outputs
,
{{
"to"
,
to
}});
}
Node
*
CreateGemm
(
Graph
*
graph
,
Node
*
node
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
,
int64_t
transA
,
int64_t
transB
,
float
alpha
,
float
beta
)
{
return
CreateBaseOp
(
graph
,
node
,
"popart_gemm"
,
inputs
,
outputs
,
{
{
"alpha"
,
alpha
},
{
"beta"
,
beta
},
{
"transA"
,
transA
},
{
"transB"
,
transB
},
});
}
Node
*
CreateReshape
(
Graph
*
graph
,
Node
*
node
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
,
const
std
::
vector
<
int64_t
>
&
oshape
)
{
auto
attr
=
AttributeMap
{
{
"value"
,
oshape
},
{
"dims"
,
std
::
vector
<
int64_t
>
{
static_cast
<
int64_t
>
(
oshape
.
size
())}},
{
"dtype"
,
ONNXDataType
::
INT64
}};
auto
new_node_const
=
CreateBaseOp
(
graph
,
node
,
"popart_constant"
,
{},
{},
attr
);
auto
new_node_reshape
=
CreateBaseOp
(
graph
,
node
,
"popart_reshape"
,
{
inputs
[
0
],
new_node_const
->
outputs
[
0
]},
outputs
);
return
new_node_reshape
;
}
Node
*
CreateConv
(
Graph
*
graph
,
Node
*
node
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
,
const
std
::
vector
<
int64_t
>
&
dilations
,
int64_t
group
,
const
std
::
vector
<
int64_t
>
&
kernel_shape
,
const
std
::
vector
<
int64_t
>
&
pads
,
const
std
::
vector
<
int64_t
>
&
strides
)
{
auto
attrs
=
AttributeMap
{
{
"dilations"
,
dilations
},
{
"group"
,
group
},
{
"kernel_shape"
,
kernel_shape
},
{
"pads"
,
pads
},
{
"strides"
,
strides
},
};
return
CreateBaseOp
(
graph
,
node
,
"popart_conv"
,
inputs
,
outputs
,
attrs
);
}
Node
*
CreateSoftmaxOpset11
(
Graph
*
graph
,
Node
*
node
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
,
int64_t
axis
)
{
PADDLE_ENFORCE_EQ
(
inputs
.
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"Softmax op only support one input"
));
auto
x_shape
=
inputs
[
0
]
->
Var
()
->
GetShape
();
int
x_rank
=
x_shape
.
size
();
if
(
axis
<
0
)
{
axis
=
axis
+
x_rank
;
}
if
(
axis
==
x_rank
-
1
)
{
return
CreateBaseOp
(
graph
,
node
,
"popart_softmax"
,
inputs
,
outputs
,
{{
"axis"
,
int64_t
{
-
1
}}});
}
else
{
auto
perm
=
std
::
vector
<
int64_t
>
(
x_rank
);
std
::
iota
(
perm
.
begin
(),
perm
.
end
(),
0
);
perm
[
x_rank
-
1
]
=
axis
;
perm
[
axis
]
=
x_rank
-
1
;
auto
new_transpose_pre
=
CreateBaseOp
(
graph
,
node
,
"popart_transpose"
,
inputs
,
{},
{{
"perm"
,
perm
}});
auto
new_softmax
=
CreateBaseOp
(
graph
,
node
,
"popart_softmax"
,
new_transpose_pre
->
outputs
,
{},
{{
"axis"
,
int64_t
{
-
1
}}});
return
CreateBaseOp
(
graph
,
node
,
"popart_transpose"
,
new_softmax
->
outputs
,
outputs
,
{{
"perm"
,
perm
}});
}
}
}
// namespace ipu
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h
0 → 100644
浏览文件 @
883ee1a3
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/platform/device/ipu/common.h"
#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h"
namespace
paddle
{
namespace
platform
{
namespace
ipu
{
using
paddle
::
framework
::
AttributeMap
;
template
<
typename
T
>
AttributeMap
MakeConstAttrMap
(
std
::
vector
<
T
>
value
,
std
::
vector
<
int64_t
>
dims
,
int
dtype
)
{
return
AttributeMap
{{
"value"
,
value
},
{
"dims"
,
dims
},
{
"dtype"
,
dtype
}};
}
template
<
typename
T
>
AttributeMap
MakeConstAttrMapFromValue
(
T
v
,
std
::
vector
<
int64_t
>
dims
,
int
dtype
)
{
size_t
size
=
1
;
for
(
auto
&
dim
:
dims
)
{
size
*=
dim
;
}
return
MakeConstAttrMap
<
T
>
(
std
::
vector
<
T
>
(
size
,
v
),
dims
,
dtype
);
}
const
std
::
string
GenerateVarName
();
const
std
::
string
CreateOpIdentifyId
(
Node
*
node
);
Node
*
MakeVarNode
(
Graph
*
graph
,
Node
*
node
);
Node
*
MakeOpNode
(
Graph
*
graph
,
Node
*
node
,
const
std
::
string
&
type
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
);
Node
*
CreateBaseOp
(
Graph
*
graph
,
Node
*
node
,
const
std
::
string
&
type
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
,
const
AttributeMap
&
attrs
=
{});
Node
*
CreateConst
(
Graph
*
graph
,
Node
*
node
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
,
const
AttributeMap
&
attrs
);
// otype is proto::VarType::Type
Node
*
CreateCast
(
Graph
*
graph
,
Node
*
node
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
,
const
int
otype
);
Node
*
CreateGemm
(
Graph
*
graph
,
Node
*
node
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
,
int64_t
transA
=
0
,
int64_t
transB
=
0
,
float
alpha
=
1.0
f
,
float
beta
=
1.0
f
);
Node
*
CreateReshape
(
Graph
*
graph
,
Node
*
node
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
,
const
std
::
vector
<
int64_t
>
&
oshape
);
Node
*
CreateConv
(
Graph
*
graph
,
Node
*
node
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
,
const
std
::
vector
<
int64_t
>
&
dilations
=
{
1
,
1
},
int64_t
group
=
1
,
const
std
::
vector
<
int64_t
>
&
kernel_shape
=
{},
const
std
::
vector
<
int64_t
>
&
pads
=
{
0
,
0
,
0
,
0
},
const
std
::
vector
<
int64_t
>
&
strides
=
{
1
,
1
});
Node
*
CreateSoftmaxOpset11
(
Graph
*
graph
,
Node
*
node
,
const
std
::
vector
<
Node
*>
&
inputs
,
const
std
::
vector
<
Node
*>
&
outputs
,
int64_t
axis
);
}
// namespace ipu
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/ipu/supported_ops_autogen.h
浏览文件 @
883ee1a3
...
...
@@ -195,3 +195,5 @@ OP_DECL(popart_sqrt, aiOnnxOpset.sqrt, NONE) // NOLINT
OP_DECL
(
popart_tanh
,
aiOnnxOpset
.
tanh
,
NONE
)
// NOLINT
OP_DECL
(
popart_tile
,
aiOnnxOpset
.
tile
,
NONE
)
// NOLINT
OP_DECL
(
popart_transpose
,
aiOnnxOpset
.
transpose
,
ARG
(
INT_VEC
,
perm
)
)
// NOLINT
// clang-format on
paddle/fluid/platform/device_context.cc
浏览文件 @
883ee1a3
...
...
@@ -16,6 +16,9 @@ limitations under the License. */
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/ipu/ipu_backend.h"
#endif
#include "glog/logging.h"
#include "paddle/fluid/platform/profiler.h"
...
...
@@ -96,8 +99,9 @@ platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) {
if
(
it
==
device_contexts_
.
end
())
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Place %s is not supported. Please check that your paddle compiles "
"with WITH_GPU, WITH_XPU or WITH_ASCEND_CL option or check that "
"your train process set the correct device id if you use Executor."
,
"with WITH_GPU, WITH_XPU, WITH_IPU or WITH_ASCEND_CL option or check "
"that your train process set the correct device id if you use "
"Executor."
,
place
));
}
return
it
->
second
.
get
().
get
();
...
...
@@ -158,6 +162,14 @@ DeviceContextPool::DeviceContextPool(
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"XPUPlace is not supported. Please "
"re-compile with WITH_XPU option."
));
#endif
}
else
if
(
platform
::
is_ipu_place
(
p
))
{
#ifdef PADDLE_WITH_IPU
EmplaceDeviceContext
<
IPUDeviceContext
,
IPUPlace
>
(
&
device_contexts_
,
p
);
#else
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"IPUPlace is not supported. Please "
"re-compile with WITH_IPU option."
));
#endif
}
else
if
(
platform
::
is_npu_place
(
p
))
{
#ifdef PADDLE_WITH_ASCEND_CL
...
...
@@ -195,6 +207,22 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
Place
CPUDeviceContext
::
GetPlace
()
const
{
return
place_
;
}
#ifdef PADDLE_WITH_IPU
IPUDeviceContext
::
IPUDeviceContext
(
IPUPlace
place
)
:
place_
(
place
)
{
int
id
=
place
.
GetDeviceId
();
std
::
shared_ptr
<
platform
::
ipu
::
IpuBackend
>
ipu_backend
=
platform
::
ipu
::
IpuBackend
::
GetInstance
();
device_
=
ipu_backend
->
GetDevice
(
id
);
}
Place
IPUDeviceContext
::
GetPlace
()
const
{
return
place_
;
}
void
IPUDeviceContext
::
Wait
()
const
{
/*! \brief Wait for all operations completion in the stream. */
}
IPUDeviceContext
::~
IPUDeviceContext
()
{}
#endif
#ifdef PADDLE_WITH_XPU
XPUDeviceContext
::
XPUDeviceContext
()
{
context_
=
xpu
::
create_context
();
...
...
paddle/fluid/platform/place.cc
浏览文件 @
883ee1a3
...
...
@@ -36,6 +36,7 @@ class PlacePrinter : public boost::static_visitor<> {
void
operator
()(
const
XPUPlace
&
p
)
{
os_
<<
"XPUPlace("
<<
p
.
device
<<
")"
;
}
void
operator
()(
const
NPUPlace
&
p
)
{
os_
<<
"NPUPlace("
<<
p
.
device
<<
")"
;
}
void
operator
()(
const
NPUPinnedPlace
&
p
)
{
os_
<<
"NPUPinnedPlace"
;
}
void
operator
()(
const
IPUPlace
&
p
)
{
os_
<<
"IPUPlace("
<<
p
.
device
<<
")"
;
}
void
operator
()(
const
CUDAPinnedPlace
&
p
)
{
os_
<<
"CUDAPinnedPlace"
;
}
private:
...
...
@@ -56,6 +57,10 @@ bool is_npu_place(const Place &p) {
return
boost
::
apply_visitor
(
IsNPUPlace
(),
p
);
}
bool
is_ipu_place
(
const
Place
&
p
)
{
return
boost
::
apply_visitor
(
IsIPUPlace
(),
p
);
}
bool
is_cpu_place
(
const
Place
&
p
)
{
return
boost
::
apply_visitor
(
IsCPUPlace
(),
p
);
}
...
...
@@ -80,6 +85,8 @@ bool is_same_place(const Place &p1, const Place &p2) {
return
BOOST_GET_CONST
(
XPUPlace
,
p1
)
==
BOOST_GET_CONST
(
XPUPlace
,
p2
);
}
else
if
(
is_npu_place
(
p1
))
{
return
BOOST_GET_CONST
(
NPUPlace
,
p1
)
==
BOOST_GET_CONST
(
NPUPlace
,
p2
);
}
else
if
(
is_ipu_place
(
p1
))
{
return
BOOST_GET_CONST
(
IPUPlace
,
p1
)
==
BOOST_GET_CONST
(
IPUPlace
,
p2
);
}
else
{
return
BOOST_GET_CONST
(
CUDAPlace
,
p1
)
==
BOOST_GET_CONST
(
CUDAPlace
,
p2
);
}
...
...
paddle/fluid/platform/place.h
浏览文件 @
883ee1a3
...
...
@@ -95,12 +95,25 @@ struct NPUPinnedPlace {
inline
bool
operator
!=
(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
inline
bool
operator
<
(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
};
struct
IPUPlace
{
IPUPlace
()
:
IPUPlace
(
0
)
{}
explicit
IPUPlace
(
int
d
)
:
device
(
d
)
{}
inline
int
GetDeviceId
()
const
{
return
device
;
}
// needed for variant equality comparison
inline
bool
operator
==
(
const
IPUPlace
&
o
)
const
{
return
device
==
o
.
device
;
}
inline
bool
operator
!=
(
const
IPUPlace
&
o
)
const
{
return
!
(
*
this
==
o
);
}
inline
bool
operator
<
(
const
IPUPlace
&
o
)
const
{
return
device
<
o
.
device
;
}
int
device
;
};
struct
IsCUDAPlace
:
public
boost
::
static_visitor
<
bool
>
{
bool
operator
()(
const
CPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
true
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
};
...
...
@@ -110,6 +123,7 @@ struct IsCPUPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
};
...
...
@@ -119,6 +133,7 @@ struct IsCUDAPinnedPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
cuda_pinned
)
const
{
return
true
;
}
};
...
...
@@ -128,6 +143,7 @@ struct IsXPUPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
XPUPlace
&
)
const
{
return
true
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
};
...
...
@@ -137,6 +153,7 @@ struct IsNPUPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
true
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
};
...
...
@@ -145,22 +162,33 @@ struct IsNPUPinnedPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
CPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
true
;
}
};
struct
IsIPUPlace
:
public
boost
::
static_visitor
<
bool
>
{
bool
operator
()(
const
CPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
true
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
};
class
Place
:
public
boost
::
variant
<
CUDAPlace
,
XPUPlace
,
NPUPlace
,
CPUPlace
,
CUDAPinnedPlace
,
NPUPinnedPlace
>
{
CUDAPinnedPlace
,
NPUPinnedPlace
,
IPUPlace
>
{
private:
using
PlaceBase
=
boost
::
variant
<
CUDAPlace
,
XPUPlace
,
NPUPlace
,
CPUPlace
,
CUDAPinnedPlace
,
NPUPinnedPlace
>
;
CUDAPinnedPlace
,
NPUPinnedPlace
,
IPUPlace
>
;
public:
Place
()
=
default
;
Place
(
const
CPUPlace
&
cpu_place
)
:
PlaceBase
(
cpu_place
)
{}
// NOLINT
Place
(
const
XPUPlace
&
xpu_place
)
:
PlaceBase
(
xpu_place
)
{}
// NOLINT
Place
(
const
NPUPlace
&
npu_place
)
:
PlaceBase
(
npu_place
)
{}
// NOLINT
Place
(
const
IPUPlace
&
ipu_place
)
:
PlaceBase
(
ipu_place
)
{}
// NOLINT
Place
(
const
CUDAPlace
&
cuda_place
)
:
PlaceBase
(
cuda_place
)
{}
// NOLINT
Place
(
const
CUDAPinnedPlace
&
cuda_pinned_place
)
// NOLINT
:
PlaceBase
(
cuda_pinned_place
)
{}
...
...
@@ -180,6 +208,7 @@ using PlaceList = std::vector<Place>;
bool
is_gpu_place
(
const
Place
&
);
bool
is_xpu_place
(
const
Place
&
);
bool
is_npu_place
(
const
Place
&
);
bool
is_ipu_place
(
const
Place
&
);
bool
is_cpu_place
(
const
Place
&
);
bool
is_cuda_pinned_place
(
const
Place
&
);
bool
is_npu_pinned_place
(
const
Place
&
);
...
...
@@ -228,6 +257,15 @@ struct PlaceVisitorWrapper
return
typename
Visitor
::
result_type
();
#endif
}
typename
Visitor
::
result_type
operator
()(
const
IPUPlace
&
ipu
)
const
{
#ifdef PADDLE_WITH_IPU
return
visitor_
(
ipu
);
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Paddle is not compiled with IPU. Cannot visit ipu device"
));
return
typename
Visitor
::
result_type
();
#endif
}
typename
Visitor
::
result_type
operator
()(
const
CUDAPlace
&
cuda
)
const
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
paddle/fluid/pybind/.gitignore
浏览文件 @
883ee1a3
pybind.h
op_function_impl.h
\ No newline at end of file
op_function_impl.h
eager_op_function_impl.h
paddle/fluid/pybind/pybind.cc
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/fluid/pybind/reader_py.cc
浏览文件 @
883ee1a3
...
...
@@ -37,6 +37,9 @@ PADDLE_DEFINE_EXPORTED_bool(
"If set true, the queue.pop will only get data from queue but not "
"remove the data from queue for speed testing"
);
// disable auto conversion to list in Python
PYBIND11_MAKE_OPAQUE
(
paddle
::
framework
::
LoDTensorArray
);
namespace
paddle
{
namespace
pybind
{
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
883ee1a3
...
...
@@ -313,6 +313,21 @@ void SetTensorFromPyArrayT(
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"Cannot use XPUPlace in CPU/GPU version, "
"Please recompile or reinstall Paddle with XPU support."
));
#endif
}
else
if
(
paddle
::
platform
::
is_ipu_place
(
place
))
{
#ifdef PADDLE_WITH_IPU
if
(
zero_copy
)
{
auto
holder
=
std
::
make_shared
<
details
::
NumpyAllocation
<
T
>>
(
array
);
auto
type
=
framework
::
ToDataType
(
std
::
type_index
(
typeid
(
T
)));
self
->
ResetHolderWithType
(
holder
,
type
);
}
else
{
auto
dst
=
self
->
mutable_data
<
T
>
(
place
);
std
::
memcpy
(
dst
,
array
.
data
(),
array
.
nbytes
());
}
#else
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"Cannot use IPUPlace in CPU/GPU/XPU/NPU version, "
"Please recompile or reinstall Paddle with IPU support."
));
#endif
}
else
if
(
paddle
::
platform
::
is_npu_place
(
place
))
{
#ifdef PADDLE_WITH_ASCEND_CL
...
...
paddle/pten/api/lib/CMakeLists.txt
浏览文件 @
883ee1a3
...
...
@@ -22,6 +22,10 @@ set(api_source_file ${CMAKE_SOURCE_DIR}/paddle/pten/api/lib/api.cc)
set
(
api_header_file_tmp
${
api_header_file
}
.tmp
)
set
(
api_source_file_tmp
${
api_source_file
}
.tmp
)
if
(
NOT PYTHON_EXECUTABLE
)
find_package
(
PythonInterp REQUIRED
)
endif
()
add_custom_command
(
OUTPUT
${
api_header_file
}
${
api_source_file
}
COMMAND
${
PYTHON_EXECUTABLE
}
-m pip install pyyaml
...
...
paddle/pten/api/lib/kernel_declare.h
0 → 100644
浏览文件 @
883ee1a3
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/pten/core/kernel_registry.h"
// TODO(chenweihang) After the kernel is split into a single file,
// the kernel declare statement is automatically generated according to the
// file name of the kernel, and this header file will be removed
PT_DECLARE_KERNEL
(
full_like
,
CPU
);
PT_DECLARE_KERNEL
(
dot
,
CPU
);
PT_DECLARE_KERNEL
(
flatten
,
CPU
);
PT_DECLARE_KERNEL
(
sign
,
CPU
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_KERNEL
(
full_like
,
CUDA
);
PT_DECLARE_KERNEL
(
dot
,
CUDA
);
PT_DECLARE_KERNEL
(
flatten
,
CUDA
);
PT_DECLARE_KERNEL
(
sign
,
CUDA
);
#endif
#ifdef PADDLE_WITH_XPU
PT_DECLARE_KERNEL
(
flatten
,
XPU
);
#endif
paddle/pten/api/lib/utils.cc
浏览文件 @
883ee1a3
...
...
@@ -25,10 +25,14 @@ limitations under the License. */
#include "paddle/pten/include/core.h"
#include "paddle/pten/include/infermeta.h"
PT_DECLARE_
MODULE
(
Utils
CPU
);
PT_DECLARE_
KERNEL
(
copy
,
CPU
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_MODULE
(
UtilsCUDA
);
PT_DECLARE_KERNEL
(
copy
,
CUDA
);
#endif
#ifdef PADDLE_WITH_XPU
PT_DECLARE_KERNEL
(
copy
,
XPU
);
#endif
namespace
paddle
{
...
...
paddle/pten/core/kernel_alias_name.h
浏览文件 @
883ee1a3
...
...
@@ -27,13 +27,13 @@ const std::unordered_map<std::string, std::string> kernel_alias_name_map = {
{
"fill_any_like"
,
"full_like"
},
{
"fill_constant"
,
"full"
},
{
"flatten_contiguous_range"
,
"flatten"
},
//
{"matmul_v2", "matmul"},
{
"matmul_v2"
,
"matmul"
},
{
"reduce_mean"
,
"mean"
},
{
"reduce_sum"
,
"sum"
},
{
"reshape2"
,
"reshape"
},
// fluid kernel "mean/reshape/matmul/flatten/sum" should be deprecated
{
"flatten"
,
"deprecated"
},
//
{"matmul", "deprecated"},
{
"matmul"
,
"deprecated"
},
{
"mean"
,
"deprecated"
},
{
"reshape"
,
"deprecated"
},
{
"sum"
,
"deprecated"
}};
...
...
paddle/pten/core/kernel_factory.h
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/pten/core/kernel_registry.h
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/pten/kernels/cpu/creation.cc
浏览文件 @
883ee1a3
...
...
@@ -61,9 +61,7 @@ void FillConstant(const CPUContext& dev_ctx,
}
// namespace pten
PT_REGISTER_MODULE
(
CreationCPU
);
PT_REGISTER_KERNEL
(
"full_like"
,
PT_REGISTER_KERNEL
(
full_like
,
CPU
,
ANY
,
pten
::
FillAnyLike
,
...
...
@@ -74,7 +72,7 @@ PT_REGISTER_KERNEL("full_like",
bool
,
paddle
::
platform
::
float16
)
{}
PT_REGISTER_KERNEL
(
"full"
,
PT_REGISTER_KERNEL
(
full
,
CPU
,
ANY
,
pten
::
FillConstant
,
...
...
paddle/pten/kernels/cpu/linalg.cc
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/pten/kernels/cpu/manipulation.cc
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/pten/kernels/cpu/math.cc
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/pten/kernels/cpu/utils.cc
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/pten/kernels/cuda/creation.cu
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/pten/kernels/cuda/linalg.cu
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/pten/kernels/cuda/manipulation.cu
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/pten/kernels/cuda/math.cu
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/pten/kernels/cuda/utils.cu
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/pten/kernels/xpu/manipulation.cc
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/pten/kernels/xpu/utils.cc
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
paddle/pten/tests/api/test_reshape_api.cc
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/__init__.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/device/__init__.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/distributed/auto_parallel/operators/dist_matmul.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/distributed/fleet/utils/internal_storage.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/distribution.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/__init__.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_while.py
0 → 100644
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/dataloader/dataloader_iter.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/dygraph/dygraph_to_static/utils.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/dygraph/varbase_patch_methods.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/executor.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/reader.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/dygraph_sharding_stage2.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/dygraph_sharding_stage2_offload.py
0 → 100644
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/ir/inference/test_adaptive_pool2d_convert_global_pass_autoscan.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/ir/inference/test_fc_elementwise_layernorm_fuse_pass.py
0 → 100644
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_auto_search_dist_matmul_op.py
0 → 100644
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_complex_view_op.py
0 → 100644
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_cuda_graph.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_distribution.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_dygraph_sharding_stage2.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_fleet_executor.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_gcd.py
0 → 100644
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_lcm.py
0 → 100644
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/framework/__init__.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/tensor/__init__.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/tensor/manipulation.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/tensor/math.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/utils/code_gen/api.yaml
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
python/paddle/utils/code_gen/api_gen.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
tools/parallel_UT_rule.py
浏览文件 @
883ee1a3
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录