Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
4be77e53
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
“86fd748808dee2448bf368f3b1389f91ec6e9d29”上不存在“develop/doc/tutorials/image_classification”
提交
4be77e53
编写于
3月 17, 2022
作者:
P
phlrain
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into move_temporal_shift_to_phi
上级
6c7a03bd
06fee998
变更
133
隐藏空白更改
内联
并排
Showing
133 changed file
with
3087 addition
and
537 deletion
+3087
-537
.gitignore
.gitignore
+2
-2
paddle/fluid/eager/accumulation/accumulation_node.cc
paddle/fluid/eager/accumulation/accumulation_node.cc
+4
-4
paddle/fluid/eager/accumulation/accumulation_node.h
paddle/fluid/eager/accumulation/accumulation_node.h
+9
-2
paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc
...ger/api/generated/eager_generated/backwards/scale_node.cc
+2
-2
paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h
...ager/api/generated/eager_generated/backwards/scale_node.h
+9
-2
paddle/fluid/eager/auto_code_generator/eager_generator.cc
paddle/fluid/eager/auto_code_generator/eager_generator.cc
+28
-5
paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py
...er/auto_code_generator/final_state_generator/eager_gen.py
+35
-6
paddle/fluid/eager/backward.cc
paddle/fluid/eager/backward.cc
+372
-18
paddle/fluid/eager/backward.h
paddle/fluid/eager/backward.h
+12
-4
paddle/fluid/eager/custom_operator/custom_operator_node.cc
paddle/fluid/eager/custom_operator/custom_operator_node.cc
+2
-2
paddle/fluid/eager/custom_operator/custom_operator_node.h
paddle/fluid/eager/custom_operator/custom_operator_node.h
+8
-2
paddle/fluid/eager/grad_node_info.h
paddle/fluid/eager/grad_node_info.h
+5
-1
paddle/fluid/eager/grad_tensor_holder.cc
paddle/fluid/eager/grad_tensor_holder.cc
+5
-0
paddle/fluid/eager/grad_tensor_holder.h
paddle/fluid/eager/grad_tensor_holder.h
+2
-0
paddle/fluid/eager/tensor_wrapper.h
paddle/fluid/eager/tensor_wrapper.h
+2
-0
paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
...uid/eager/tests/data_structure_tests/eager_tensor_test.cc
+53
-0
paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h
...e/fluid/eager/tests/data_structure_tests/grad_node_test.h
+7
-2
paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
...le/fluid/eager/tests/performance_tests/benchmark_utils.cc
+4
-4
paddle/fluid/eager/tests/task_tests/CMakeLists.txt
paddle/fluid/eager/tests/task_tests/CMakeLists.txt
+1
-0
paddle/fluid/eager/tests/task_tests/backward_test.cc
paddle/fluid/eager/tests/task_tests/backward_test.cc
+5
-4
paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc
...d/eager/tests/task_tests/cross_batch_accumulation_test.cc
+2
-2
paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc
paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc
+8
-8
paddle/fluid/eager/tests/task_tests/generated_test.cc
paddle/fluid/eager/tests/task_tests/generated_test.cc
+3
-3
paddle/fluid/eager/tests/task_tests/grad_test.cc
paddle/fluid/eager/tests/task_tests/grad_test.cc
+339
-0
paddle/fluid/eager/tests/task_tests/hook_test.cc
paddle/fluid/eager/tests/task_tests/hook_test.cc
+2
-2
paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc
...le/fluid/eager/tests/task_tests/hook_test_intermidiate.cc
+3
-3
paddle/fluid/eager/to_static/run_program_op_node.h
paddle/fluid/eager/to_static/run_program_op_node.h
+8
-2
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+1
-0
paddle/fluid/framework/ir/mixed_precision_configure_pass.cc
paddle/fluid/framework/ir/mixed_precision_configure_pass.cc
+149
-0
paddle/fluid/framework/ir/mixed_precision_configure_pass.h
paddle/fluid/framework/ir/mixed_precision_configure_pass.h
+39
-0
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+3
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+4
-0
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
...ence/analysis/passes/ir_params_sync_among_devices_pass.cc
+54
-12
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h
...rence/analysis/passes/ir_params_sync_among_devices_pass.h
+6
-1
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+33
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+5
-0
paddle/fluid/inference/api/analysis_predictor_tester.cc
paddle/fluid/inference/api/analysis_predictor_tester.cc
+26
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+16
-0
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+34
-0
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+12
-0
paddle/fluid/operators/fake_quantize_op.cu
paddle/fluid/operators/fake_quantize_op.cu
+32
-31
paddle/fluid/operators/grid_sampler_op.cc
paddle/fluid/operators/grid_sampler_op.cc
+12
-51
paddle/fluid/pybind/eager_functions.cc
paddle/fluid/pybind/eager_functions.cc
+25
-2
paddle/fluid/pybind/eager_method.cc
paddle/fluid/pybind/eager_method.cc
+14
-1
paddle/fluid/pybind/eager_properties.cc
paddle/fluid/pybind/eager_properties.cc
+1
-1
paddle/fluid/pybind/eager_utils.cc
paddle/fluid/pybind/eager_utils.cc
+15
-9
paddle/fluid/pybind/eager_utils.h
paddle/fluid/pybind/eager_utils.h
+2
-1
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+3
-0
paddle/infrt/CMakeLists.txt
paddle/infrt/CMakeLists.txt
+14
-2
paddle/infrt/backends/host/phi_allocator.h
paddle/infrt/backends/host/phi_allocator.h
+21
-0
paddle/infrt/backends/host/phi_context.h
paddle/infrt/backends/host/phi_context.h
+12
-0
paddle/infrt/backends/tensorrt/test_trt_engine.cc
paddle/infrt/backends/tensorrt/test_trt_engine.cc
+17
-18
paddle/infrt/backends/tensorrt/trt_engine.cc
paddle/infrt/backends/tensorrt/trt_engine.cc
+17
-4
paddle/infrt/backends/tensorrt/trt_engine.h
paddle/infrt/backends/tensorrt/trt_engine.h
+9
-2
paddle/infrt/dialect/CMakeLists.txt
paddle/infrt/dialect/CMakeLists.txt
+1
-7
paddle/infrt/dialect/dense_tensor.td
paddle/infrt/dialect/dense_tensor.td
+27
-1
paddle/infrt/dialect/infrt/ir/infrt_base.td
paddle/infrt/dialect/infrt/ir/infrt_base.td
+7
-0
paddle/infrt/dialect/infrt/ir/infrt_dialect.cc
paddle/infrt/dialect/infrt/ir/infrt_dialect.cc
+7
-0
paddle/infrt/dialect/infrt/pass/infrt_op_fuse.td
paddle/infrt/dialect/infrt/pass/infrt_op_fuse.td
+1
-1
paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.cc
paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.cc
+1
-1
paddle/infrt/dialect/init_dialects.cc
paddle/infrt/dialect/init_dialects.cc
+4
-2
paddle/infrt/dialect/pd/CMakeLists.txt
paddle/infrt/dialect/pd/CMakeLists.txt
+3
-0
paddle/infrt/dialect/pd/common/CMakeLists.txt
paddle/infrt/dialect/pd/common/CMakeLists.txt
+4
-0
paddle/infrt/dialect/pd/ir/CMakeLists.txt
paddle/infrt/dialect/pd/ir/CMakeLists.txt
+7
-0
paddle/infrt/dialect/pd/ir/pd_extra_ops.td
paddle/infrt/dialect/pd/ir/pd_extra_ops.td
+1
-1
paddle/infrt/dialect/pd/ir/pd_op_base.td
paddle/infrt/dialect/pd/ir/pd_op_base.td
+3
-3
paddle/infrt/dialect/pd/ir/pd_ops.cc
paddle/infrt/dialect/pd/ir/pd_ops.cc
+75
-0
paddle/infrt/dialect/pd/ir/pd_ops.h
paddle/infrt/dialect/pd/ir/pd_ops.h
+33
-0
paddle/infrt/dialect/pd/pass/CMakeLists.txt
paddle/infrt/dialect/pd/pass/CMakeLists.txt
+8
-0
paddle/infrt/dialect/pd/pass/pd_op_fuse.td
paddle/infrt/dialect/pd/pass/pd_op_fuse.td
+2
-2
paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.cc
paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.cc
+44
-0
paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.h
paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.h
+24
-0
paddle/infrt/dialect/phi/ir/infrt_phi_tensor.td
paddle/infrt/dialect/phi/ir/infrt_phi_tensor.td
+5
-3
paddle/infrt/dialect/phi/pass/phi_op_convert_pass.cc
paddle/infrt/dialect/phi/pass/phi_op_convert_pass.cc
+40
-32
paddle/infrt/dialect/phi/pass/proto_arg_map_context.h
paddle/infrt/dialect/phi/pass/proto_arg_map_context.h
+1
-1
paddle/infrt/dialect/tensorrt/pd_lower_to_trt.td
paddle/infrt/dialect/tensorrt/pd_lower_to_trt.td
+1
-1
paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.cc
paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.cc
+2
-1
paddle/infrt/dialect/tensorrt/trt_graph_split_pass.cc
paddle/infrt/dialect/tensorrt/trt_graph_split_pass.cc
+1
-1
paddle/infrt/dialect/tensorrt/trt_op_converter_pass.cc
paddle/infrt/dialect/tensorrt/trt_op_converter_pass.cc
+1
-1
paddle/infrt/dialect/tensorrt/trt_op_teller_pass.cc
paddle/infrt/dialect/tensorrt/trt_op_teller_pass.cc
+1
-1
paddle/infrt/dialect/tensorrt/trt_ops.cc
paddle/infrt/dialect/tensorrt/trt_ops.cc
+4
-0
paddle/infrt/dialect/tensorrt/trt_ops.h
paddle/infrt/dialect/tensorrt/trt_ops.h
+1
-1
paddle/infrt/dialect/tensorrt/trt_ops.td
paddle/infrt/dialect/tensorrt/trt_ops.td
+30
-11
paddle/infrt/host_context/mlir_exec.cc
paddle/infrt/host_context/mlir_exec.cc
+7
-1
paddle/infrt/host_context/mlir_to_runtime_translate.cc
paddle/infrt/host_context/mlir_to_runtime_translate.cc
+84
-50
paddle/infrt/host_context/paddle_mlir.cc
paddle/infrt/host_context/paddle_mlir.cc
+1
-1
paddle/infrt/host_context/paddle_mlir.h
paddle/infrt/host_context/paddle_mlir.h
+7
-7
paddle/infrt/host_context/value.h
paddle/infrt/host_context/value.h
+29
-1
paddle/infrt/kernel/CMakeLists.txt
paddle/infrt/kernel/CMakeLists.txt
+1
-0
paddle/infrt/kernel/phi/context_kernels.cc
paddle/infrt/kernel/phi/context_kernels.cc
+10
-0
paddle/infrt/kernel/phi/context_kernels.h
paddle/infrt/kernel/phi/context_kernels.h
+4
-0
paddle/infrt/kernel/phi/dense_tensor_kernels.cc
paddle/infrt/kernel/phi/dense_tensor_kernels.cc
+75
-12
paddle/infrt/kernel/phi/dense_tensor_kernels.h
paddle/infrt/kernel/phi/dense_tensor_kernels.h
+7
-0
paddle/infrt/kernel/phi/registry.cc
paddle/infrt/kernel/phi/registry.cc
+10
-1
paddle/infrt/kernel/tensor_kernels.cc
paddle/infrt/kernel/tensor_kernels.cc
+26
-0
paddle/infrt/kernel/tensorrt/CMakeLists.txt
paddle/infrt/kernel/tensorrt/CMakeLists.txt
+10
-0
paddle/infrt/kernel/tensorrt/registry.cc
paddle/infrt/kernel/tensorrt/registry.cc
+33
-0
paddle/infrt/kernel/tensorrt/registry.h
paddle/infrt/kernel/tensorrt/registry.h
+35
-0
paddle/infrt/kernel/tensorrt/trt_kernels.cc
paddle/infrt/kernel/tensorrt/trt_kernels.cc
+172
-0
paddle/infrt/kernel/tensorrt/trt_kernels.h
paddle/infrt/kernel/tensorrt/trt_kernels.h
+49
-0
paddle/infrt/tests/dialect/disabled_trt.mlir
paddle/infrt/tests/dialect/disabled_trt.mlir
+37
-0
paddle/infrt/tests/dialect/pd/rewrite.mlir
paddle/infrt/tests/dialect/pd/rewrite.mlir
+1
-1
paddle/infrt/tests/dialect/phi/dense_tensor.mlir
paddle/infrt/tests/dialect/phi/dense_tensor.mlir
+1
-1
paddle/infrt/tests/dialect/phi/phi_test.mlir
paddle/infrt/tests/dialect/phi/phi_test.mlir
+1
-1
paddle/infrt/tests/dialect/trt_ops.mlir
paddle/infrt/tests/dialect/trt_ops.mlir
+10
-10
paddle/phi/api/include/tensor.h
paddle/phi/api/include/tensor.h
+11
-2
paddle/phi/api/lib/CMakeLists.txt
paddle/phi/api/lib/CMakeLists.txt
+1
-1
paddle/phi/api/lib/api_gen_utils.cc
paddle/phi/api/lib/api_gen_utils.cc
+3
-9
paddle/phi/api/lib/data_transform.cc
paddle/phi/api/lib/data_transform.cc
+3
-6
paddle/phi/api/lib/tensor.cc
paddle/phi/api/lib/tensor.cc
+12
-2
paddle/phi/api/lib/tensor_method.cc
paddle/phi/api/lib/tensor_method.cc
+96
-6
paddle/phi/backends/gpu/gpu_context.cc
paddle/phi/backends/gpu/gpu_context.cc
+4
-0
paddle/phi/backends/gpu/gpu_context.h
paddle/phi/backends/gpu/gpu_context.h
+2
-0
paddle/phi/common/CMakeLists.txt
paddle/phi/common/CMakeLists.txt
+1
-1
paddle/phi/core/kernel_factory.h
paddle/phi/core/kernel_factory.h
+8
-0
paddle/phi/infermeta/binary.cc
paddle/phi/infermeta/binary.cc
+42
-0
paddle/phi/infermeta/binary.h
paddle/phi/infermeta/binary.h
+5
-0
paddle/phi/kernels/selected_rows/copy_kernel.cc
paddle/phi/kernels/selected_rows/copy_kernel.cc
+49
-0
paddle/phi/kernels/selected_rows/copy_kernel.h
paddle/phi/kernels/selected_rows/copy_kernel.h
+31
-0
python/paddle/fluid/dygraph/base.py
python/paddle/fluid/dygraph/base.py
+47
-15
python/paddle/fluid/dygraph/io.py
python/paddle/fluid/dygraph/io.py
+29
-39
python/paddle/fluid/dygraph/jit.py
python/paddle/fluid/dygraph/jit.py
+1
-1
python/paddle/fluid/dygraph/layers.py
python/paddle/fluid/dygraph/layers.py
+2
-1
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+2
-2
python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
...dle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
+8
-0
python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_in_eager_mode.py
.../unittests/parallel_dygraph_dataparallel_in_eager_mode.py
+17
-3
python/paddle/fluid/tests/unittests/test_egr_python_api.py
python/paddle/fluid/tests/unittests/test_egr_python_api.py
+1
-1
python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
...ddle/fluid/tests/unittests/test_imperative_double_grad.py
+183
-31
python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py
...uid/tests/unittests/test_paddle_imperative_double_grad.py
+67
-26
python/paddle/static/input.py
python/paddle/static/input.py
+1
-1
python/paddle/utils/code_gen/api_base.py
python/paddle/utils/code_gen/api_base.py
+1
-1
tools/infrt/custom_pdop.td
tools/infrt/custom_pdop.td
+0
-10
tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py
tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py
+3
-7
未找到文件。
.gitignore
浏览文件 @
4be77e53
...
@@ -52,12 +52,12 @@ tools/__pycache__
...
@@ -52,12 +52,12 @@ tools/__pycache__
# This file is automatically generated.
# This file is automatically generated.
# TODO(zhiqiang) Move this file to build directory.
# TODO(zhiqiang) Move this file to build directory.
paddle/infrt/dialect/pd_ops.td
paddle/infrt/dialect/pd
/ir/pd
_ops.td
paddle/infrt/dialect/phi/ir/phi_cpu_kernels.td
paddle/infrt/dialect/phi/ir/phi_cpu_kernels.td
paddle/infrt/dialect/phi/ir/phi_gpu_kernels.td
paddle/infrt/dialect/phi/ir/phi_gpu_kernels.td
tools/infrt/kernels.json
tools/infrt/kernels.json
tools/infrt/kernel_signature.json
tools/infrt/kernel_signature.json
paddle/infrt/dialect/pd_ops_info.h
paddle/infrt/dialect/pd
/common/pd
_ops_info.h
.lit_test_times.txt
.lit_test_times.txt
paddle/infrt/tests/dialect/Output
paddle/infrt/tests/dialect/Output
paddle/infrt/tests/lit.cfg.py
paddle/infrt/tests/lit.cfg.py
...
...
paddle/fluid/eager/accumulation/accumulation_node.cc
浏览文件 @
4be77e53
...
@@ -24,7 +24,7 @@
...
@@ -24,7 +24,7 @@
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/errors.h"
#include "glog/logging.h"
#include "glog/logging.h"
DECLARE_bool
(
retain_grad_for_all_tensor
);
namespace
egr
{
namespace
egr
{
static
void
CopyOrAddTensor
(
paddle
::
experimental
::
Tensor
*
tensor
,
static
void
CopyOrAddTensor
(
paddle
::
experimental
::
Tensor
*
tensor
,
...
@@ -39,8 +39,8 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
...
@@ -39,8 +39,8 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
}
}
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
GradNodeAccumulation
::
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
GradNodeAccumulation
::
operator
()(
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
{
bool
create_graph
)
{
VLOG
(
3
)
<<
"Running Eager Backward Node: GradNodeAccumulation"
;
VLOG
(
3
)
<<
"Running Eager Backward Node: GradNodeAccumulation"
;
PADDLE_ENFORCE
(
grads
.
size
()
==
1
,
PADDLE_ENFORCE
(
grads
.
size
()
==
1
,
paddle
::
platform
::
errors
::
Fatal
(
paddle
::
platform
::
errors
::
Fatal
(
...
@@ -62,7 +62,7 @@ operator()(
...
@@ -62,7 +62,7 @@ operator()(
grad_out
=
grads
[
0
][
0
];
grad_out
=
grads
[
0
][
0
];
}
}
if
(
!
weak_grad_
.
expired
())
{
if
(
!
weak_grad_
.
expired
()
&&
FLAGS_retain_grad_for_all_tensor
)
{
auto
grad
=
weak_grad_
.
lock
();
auto
grad
=
weak_grad_
.
lock
();
CopyOrAddTensor
(
grad
.
get
(),
grad_out
);
CopyOrAddTensor
(
grad
.
get
(),
grad_out
);
}
}
...
...
paddle/fluid/eager/accumulation/accumulation_node.h
浏览文件 @
4be77e53
...
@@ -35,8 +35,15 @@ class GradNodeAccumulation : public GradNodeBase {
...
@@ -35,8 +35,15 @@ class GradNodeAccumulation : public GradNodeBase {
// Functor: perform backward computations
// Functor: perform backward computations
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
override
;
bool
create_graph
=
false
)
override
;
void
ClearTensorWrappers
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
}
bool
IsTensorWrappersCleared
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
return
false
;
}
std
::
string
name
()
{
return
"GradNodeAccumulation"
;
}
std
::
string
name
()
{
return
"GradNodeAccumulation"
;
}
...
...
paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc
浏览文件 @
4be77e53
...
@@ -145,8 +145,8 @@ void GradNodeScale::SetTensorWrappers_X(
...
@@ -145,8 +145,8 @@ void GradNodeScale::SetTensorWrappers_X(
void
GradNodeScale
::
SetAttributes_scale
(
float
scale
)
{
scale_
=
scale
;
}
void
GradNodeScale
::
SetAttributes_scale
(
float
scale
)
{
scale_
=
scale
;
}
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
GradNodeScale
::
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
GradNodeScale
::
operator
()(
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
{
bool
create_graph
)
{
// 1. Check Output Size
// 1. Check Output Size
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
((
grads
.
size
()
==
1
)
&&
(
grads
[
0
].
size
()
==
1
)),
((
grads
.
size
()
==
1
)
&&
(
grads
[
0
].
size
()
==
1
)),
...
...
paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h
浏览文件 @
4be77e53
...
@@ -39,8 +39,15 @@ class GradNodeScale : public GradNodeBase {
...
@@ -39,8 +39,15 @@ class GradNodeScale : public GradNodeBase {
// Functor: perform backward computations
// Functor: perform backward computations
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
override
;
bool
create_graph
=
false
)
override
;
void
ClearTensorWrappers
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
}
bool
IsTensorWrappersCleared
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
return
false
;
}
void
SetTensorWrappers_X
(
void
SetTensorWrappers_X
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
);
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
);
...
...
paddle/fluid/eager/auto_code_generator/eager_generator.cc
浏览文件 @
4be77e53
...
@@ -2074,7 +2074,8 @@ static std::string GenerateGradNodeCCContents(
...
@@ -2074,7 +2074,8 @@ static std::string GenerateGradNodeCCContents(
const
char
*
GRAD_FUNCTION_TEMPLATE
=
const
char
*
GRAD_FUNCTION_TEMPLATE
=
"std::vector<std::vector<paddle::experimental::Tensor>> "
"std::vector<std::vector<paddle::experimental::Tensor>> "
"GradNode%s::operator()(const "
"GradNode%s::operator()(const "
"std::vector<std::vector<paddle::experimental::Tensor>>& grads) {
\n
%s
\n
}"
;
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, "
"bool create_graph) {
\n
%s
\n
}"
;
std
::
string
grad_function_str
=
paddle
::
string
::
Sprintf
(
std
::
string
grad_function_str
=
paddle
::
string
::
Sprintf
(
GRAD_FUNCTION_TEMPLATE
,
fwd_op_type
,
generated_grad_function_body
);
GRAD_FUNCTION_TEMPLATE
,
fwd_op_type
,
generated_grad_function_body
);
...
@@ -2109,18 +2110,28 @@ static std::string GenerateGradNodeHeaderContents(
...
@@ -2109,18 +2110,28 @@ static std::string GenerateGradNodeHeaderContents(
"
\n
"
"
\n
"
" virtual std::vector<std::vector<paddle::experimental::Tensor>> "
" virtual std::vector<std::vector<paddle::experimental::Tensor>> "
"operator()(const "
"operator()(const "
"std::vector<std::vector<paddle::experimental::Tensor>>& grads) "
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, const "
"bool create_graph = false) "
"override;
\n
"
"override;
\n
"
"
\n
"
"
\n
"
" void ClearTensorWrappers() override {
\n
"
"%s
\n
"
" is_tensor_wrappers_cleared = true;
\n
"
" }
\n
"
" std::string name() override { return
\"
GradNode%s
\"
; }
\n
"
" std::string name() override { return
\"
GradNode%s
\"
; }
\n
"
"
\n
"
"
\n
"
" // SetX, SetY, ...
\n
"
" // SetX, SetY, ...
\n
"
"%s
\n
"
"%s
\n
"
" // SetAttrMap
\n
"
" // SetAttrMap
\n
"
"%s
\n
"
"%s
\n
"
" bool IsTensorWrappersCleared() override {
\n
"
" return is_tensor_wrappers_cleared;
\n
"
" }
\n
"
" private:
\n
"
" private:
\n
"
" // TensorWrappers
\n
"
" // TensorWrappers
\n
"
"%s
\n
"
"%s
\n
"
" bool is_tensor_wrappers_cleared = false;
\n
"
"
\n
"
" // Attribute Map
\n
"
" // Attribute Map
\n
"
"%s
\n
"
"%s
\n
"
"};"
;
"};"
;
...
@@ -2154,6 +2165,7 @@ static std::string GenerateGradNodeHeaderContents(
...
@@ -2154,6 +2165,7 @@ static std::string GenerateGradNodeHeaderContents(
std
::
string
set_tensor_wrappers_str
=
""
;
std
::
string
set_tensor_wrappers_str
=
""
;
std
::
string
tensor_wrapper_members_str
=
""
;
std
::
string
tensor_wrapper_members_str
=
""
;
std
::
string
clear_tensor_wrappers_str
=
""
;
for
(
const
auto
&
iter
:
op_base_infos
)
{
for
(
const
auto
&
iter
:
op_base_infos
)
{
const
std
::
map
<
std
::
string
,
std
::
string
>&
grad_ins_fwd_slotname_map
=
const
std
::
map
<
std
::
string
,
std
::
string
>&
grad_ins_fwd_slotname_map
=
iter
.
GetGradInsFwdSlotnameMap
();
iter
.
GetGradInsFwdSlotnameMap
();
...
@@ -2185,6 +2197,13 @@ static std::string GenerateGradNodeHeaderContents(
...
@@ -2185,6 +2197,13 @@ static std::string GenerateGradNodeHeaderContents(
SET_TENSOR_WRAPPER_BODY_TEMPLATE
,
tensor_wrapper_name
,
SET_TENSOR_WRAPPER_BODY_TEMPLATE
,
tensor_wrapper_name
,
struct_tensor_wrapper_name
);
struct_tensor_wrapper_name
);
const
char
*
CLEAR_TENSOR_WRAPPER_TEMPLATE
=
"for (auto tw: %s) {
\n
"
" tw.clear();
\n
"
" }
\n
"
;
clear_tensor_wrappers_str
+=
paddle
::
string
::
Sprintf
(
CLEAR_TENSOR_WRAPPER_TEMPLATE
,
struct_tensor_wrapper_name
);
}
else
{
}
else
{
const
char
*
ATTR_TENSOR_WRAPPER_ARG_TEMPLATE
=
const
char
*
ATTR_TENSOR_WRAPPER_ARG_TEMPLATE
=
"const paddle::experimental::Tensor& %s"
;
"const paddle::experimental::Tensor& %s"
;
...
@@ -2197,10 +2216,14 @@ static std::string GenerateGradNodeHeaderContents(
...
@@ -2197,10 +2216,14 @@ static std::string GenerateGradNodeHeaderContents(
TENSOR_WRAPPER_MEMBER_TEMPLATE
,
struct_tensor_wrapper_name
);
TENSOR_WRAPPER_MEMBER_TEMPLATE
,
struct_tensor_wrapper_name
);
const
char
*
SET_TENSOR_WRAPPER_BODY_TEMPLATE
=
const
char
*
SET_TENSOR_WRAPPER_BODY_TEMPLATE
=
"%s = egr::TensorWrapper(%s, %s /*full_reserved*/);"
;
"%s = egr::TensorWrapper(%s, %s /*full_reserved*/);
\n
"
;
tensor_wrapper_body_str
=
paddle
::
string
::
Sprintf
(
tensor_wrapper_body_str
=
paddle
::
string
::
Sprintf
(
SET_TENSOR_WRAPPER_BODY_TEMPLATE
,
struct_tensor_wrapper_name
,
SET_TENSOR_WRAPPER_BODY_TEMPLATE
,
struct_tensor_wrapper_name
,
tensor_wrapper_name
,
full_reserved_str
);
tensor_wrapper_name
,
full_reserved_str
);
const
char
*
CLEAR_TENSOR_WRAPPER_TEMPLATE
=
" %s.clear();
\n
"
;
clear_tensor_wrappers_str
+=
paddle
::
string
::
Sprintf
(
CLEAR_TENSOR_WRAPPER_TEMPLATE
,
struct_tensor_wrapper_name
);
}
}
std
::
string
full_reserved_signature_str
=
"bool full_reserved"
;
std
::
string
full_reserved_signature_str
=
"bool full_reserved"
;
const
char
*
SET_TENSOR_WRAPPER_TEMPLATE
=
const
char
*
SET_TENSOR_WRAPPER_TEMPLATE
=
...
@@ -2215,8 +2238,8 @@ static std::string GenerateGradNodeHeaderContents(
...
@@ -2215,8 +2238,8 @@ static std::string GenerateGradNodeHeaderContents(
std
::
string
grad_node_str
=
paddle
::
string
::
Sprintf
(
std
::
string
grad_node_str
=
paddle
::
string
::
Sprintf
(
GRAD_NODE_TEMPLATE
,
op_type
,
op_type
,
op_type
,
op_type
,
op_type
,
op_type
,
GRAD_NODE_TEMPLATE
,
op_type
,
op_type
,
op_type
,
op_type
,
op_type
,
op_type
,
op_type
,
op_type
,
set_tensor_wrappers_str
,
set_attr_map
_str
,
op_type
,
clear_tensor_wrappers_str
,
op_type
,
set_tensor_wrappers
_str
,
tensor_wrapper_members_str
,
attr_members_str
);
set_attr_map_str
,
tensor_wrapper_members_str
,
attr_members_str
);
return
grad_node_str
;
return
grad_node_str
;
}
}
...
...
paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py
浏览文件 @
4be77e53
...
@@ -478,6 +478,7 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
...
@@ -478,6 +478,7 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
# SetTensorWrapper Methods & TensorWrapper Members
# SetTensorWrapper Methods & TensorWrapper Members
set_tensor_wrapper_methods_str
=
""
set_tensor_wrapper_methods_str
=
""
tensor_wrapper_members_str
=
""
tensor_wrapper_members_str
=
""
clear_tensor_wrapper_str
=
""
for
tname
,
(
ttype
,
is_fwd_input
,
_
)
in
backward_fwd_input_map
.
items
():
for
tname
,
(
ttype
,
is_fwd_input
,
_
)
in
backward_fwd_input_map
.
items
():
if
tname
in
no_need_buffer_set
:
if
tname
in
no_need_buffer_set
:
no_need_buffer
=
"true"
no_need_buffer
=
"true"
...
@@ -499,6 +500,13 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
...
@@ -499,6 +500,13 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
"""
"""
tensor_wrapper_members_str
+=
PLAIN_TENSOR_MEMBER_TEMPLATE
.
format
(
tensor_wrapper_members_str
+=
PLAIN_TENSOR_MEMBER_TEMPLATE
.
format
(
tensor_wrapper_name
)
tensor_wrapper_name
)
CLEAR_TENSOR_WRAPPERS_TEMPLATE
=
"""
{}.clear();
"""
clear_tensor_wrapper_str
+=
CLEAR_TENSOR_WRAPPERS_TEMPLATE
.
format
(
tensor_wrapper_name
)
else
:
else
:
assert
IsVectorTensorType
(
ttype
)
assert
IsVectorTensorType
(
ttype
)
SET_VECTOR_TENSOR_WRAPPER_TEMPLATE
=
"""
SET_VECTOR_TENSOR_WRAPPER_TEMPLATE
=
"""
...
@@ -516,6 +524,15 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
...
@@ -516,6 +524,15 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
"""
"""
tensor_wrapper_members_str
+=
VECTOR_TENSOR_MEMBER_TEMPLATE
.
format
(
tensor_wrapper_members_str
+=
VECTOR_TENSOR_MEMBER_TEMPLATE
.
format
(
tensor_wrapper_name
)
tensor_wrapper_name
)
CLEAR_TENSOR_WRAPPERS_TEMPLATE
=
"""
for (auto tw: {}) {
tw.clear();
};
"""
clear_tensor_wrapper_str
+=
CLEAR_TENSOR_WRAPPERS_TEMPLATE
.
format
(
tensor_wrapper_name
)
# End: SetTensorWrapper Methods & TensorWrapper Members
# End: SetTensorWrapper Methods & TensorWrapper Members
# SetAttributes & Attribute Members
# SetAttributes & Attribute Members
...
@@ -524,7 +541,7 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
...
@@ -524,7 +541,7 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map,
for
aname
,
atype
,
default_val
,
_
in
backward_attrs_list
:
for
aname
,
atype
,
default_val
,
_
in
backward_attrs_list
:
saved_attr_name
=
GetSavedName
(
aname
)
saved_attr_name
=
GetSavedName
(
aname
)
SET_ATTR_METHOD_TEMPLATE
=
"""
SET_ATTR_METHOD_TEMPLATE
=
"""
void SetAttribute{}({} {}) {{
void SetAttribute{}({} {}) {{
{} = {};
{} = {};
}}
}}
"""
"""
...
@@ -555,25 +572,37 @@ class {} : public egr::GradNodeBase {{
...
@@ -555,25 +572,37 @@ class {} : public egr::GradNodeBase {{
~{}() override = default;
~{}() override = default;
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads) override;
const std::vector<std::vector<paddle::experimental::Tensor>>& grads
, bool create_graph = false
) override;
std::string name() override {{ return
\"
{}
\"
; }}
std::string name() override {{ return
\"
{}
\"
; }}
void ClearTensorWrappers() override {{
{}
is_tensor_wrappers_cleared = true;
}}
// SetTensorWrapperX, SetTensorWrapperY, ...
// SetTensorWrapperX, SetTensorWrapperY, ...
{}
{}
// SetAttributes
// SetAttributes
{}
{}
bool IsTensorWrappersCleared() override {{
return is_tensor_wrappers_cleared;
}}
private:
private:
// TensorWrappers
// TensorWrappers
{}
{}
bool is_tensor_wrappers_cleared = false;
// Attributes
// Attributes
{}
{}
}};
}};
"""
"""
node_declaration_str
=
NODE_DECLARATION_TEMPLATE
.
format
(
node_declaration_str
=
NODE_DECLARATION_TEMPLATE
.
format
(
grad_node_name
,
grad_node_name
,
grad_node_name
,
grad_node_name
,
grad_node_name
,
grad_node_name
,
grad_node_name
,
grad_node_name
,
grad_node_name
,
set_tensor_wrapper_methods
_str
,
grad_node_name
,
clear_tensor_wrapper
_str
,
set_
attribute_methods_str
,
tensor_wrapper_member
s_str
,
set_
tensor_wrapper_methods_str
,
set_attribute_method
s_str
,
attribute_members_str
)
tensor_wrapper_members_str
,
attribute_members_str
)
return
node_declaration_str
return
node_declaration_str
...
@@ -637,7 +666,7 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map,
...
@@ -637,7 +666,7 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map,
grad_api_namespace
=
f
"paddle::experimental"
grad_api_namespace
=
f
"paddle::experimental"
FUNCTION_TEMPLATE
=
"""
FUNCTION_TEMPLATE
=
"""
std::vector<std::vector<paddle::experimental::Tensor>> {}::operator()(const std::vector<std::vector<paddle::experimental::Tensor>>& grads) {{
std::vector<std::vector<paddle::experimental::Tensor>> {}::operator()(const std::vector<std::vector<paddle::experimental::Tensor>>& grads
, bool create_graph
) {{
// Call grad_api function
// Call grad_api function
auto grad_api_returns = {}::{}({});
auto grad_api_returns = {}::{}({});
{}
{}
...
...
paddle/fluid/eager/backward.cc
浏览文件 @
4be77e53
...
@@ -39,12 +39,21 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
...
@@ -39,12 +39,21 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
// Copy nodes
// Copy nodes
std
::
queue
<
GradNodeBase
*>
queue
=
init_queue
;
std
::
queue
<
GradNodeBase
*>
queue
=
init_queue
;
std
::
unordered_set
<
GradNodeBase
*>
visited
;
std
::
unordered_set
<
GradNodeBase
*>
visited
;
size_t
potential_startup_ops_cnt
=
queue
.
size
();
size_t
cnt
=
0
;
// Visit each node exactly once in any order
// Visit each node exactly once in any order
while
(
!
queue
.
empty
())
{
while
(
!
queue
.
empty
())
{
GradNodeBase
*
node
=
queue
.
front
();
GradNodeBase
*
node
=
queue
.
front
();
queue
.
pop
();
queue
.
pop
();
if
(
cnt
<
potential_startup_ops_cnt
)
{
if
(
!
node_in_degree_map
.
count
(
node
))
{
node_in_degree_map
[
node
]
=
0
;
}
cnt
+=
1
;
}
if
(
visited
.
count
(
node
))
{
if
(
visited
.
count
(
node
))
{
continue
;
continue
;
}
}
...
@@ -76,23 +85,248 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
...
@@ -76,23 +85,248 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
return
node_in_degree_map
;
return
node_in_degree_map
;
}
}
void
RunBackward
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
,
// Remove some nodes those doesn't need to be
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
grad_tensors
,
// stored in potential_stop_nodes、potential_startup_nodes
bool
retain_graph
)
{
void
UpdateGraphInfo
(
paddle
::
platform
::
RecordEvent
backward_record_event
(
std
::
unordered_map
<
GradNodeBase
*
,
AutogradMeta
*>*
"backward"
,
paddle
::
platform
::
TracerEventType
::
Operator
,
1
);
target_nodes_inputmeta_map
,
std
::
unordered_map
<
GradNodeBase
*
,
std
::
unordered_set
<
GradNodeBase
*>>*
depending_nodes
,
std
::
unordered_set
<
GradNodeBase
*>*
potential_stop_nodes
,
std
::
unordered_set
<
GradNodeBase
*>*
potential_startup_nodes
)
{
// Updated potential_sotp_nodes by depending_nodes,
// make sure the path from root to target_node is ok
std
::
unordered_set
<
GradNodeBase
*>
_startup_ops
;
VLOG
(
6
)
<<
"Running in UpdateGraphInfo"
;
std
::
queue
<
GradNodeBase
*>
queue
;
for
(
auto
&
target_nodes_inputmeta_pair
:
*
target_nodes_inputmeta_map
)
{
queue
.
emplace
(
target_nodes_inputmeta_pair
.
first
);
}
while
(
!
queue
.
empty
())
{
auto
*
target_node
=
queue
.
front
();
queue
.
pop
();
if
(
!
(
*
depending_nodes
)[
target_node
].
empty
())
{
auto
precedding_nodes
=
(
*
depending_nodes
)[
target_node
];
for
(
auto
pre_nodes
:
precedding_nodes
)
{
queue
.
emplace
(
pre_nodes
);
if
(
potential_stop_nodes
->
find
(
pre_nodes
)
!=
potential_stop_nodes
->
end
())
{
potential_stop_nodes
->
erase
(
pre_nodes
);
}
}
}
else
{
// startup_ops have no precedding nodes
VLOG
(
6
)
<<
"Emplace _startup_ops"
;
_startup_ops
.
emplace
(
target_node
);
}
}
// Purify potential_startup_nodes again, remove some
// potential startup_nodes that unreach to input target nodes
if
(
!
_startup_ops
.
empty
())
{
std
::
unordered_set
<
GradNodeBase
*>
potential_startup_nodes_to_be_erased
;
for
(
auto
node
:
*
potential_startup_nodes
)
{
if
(
_startup_ops
.
count
(
node
)
==
0
)
{
VLOG
(
6
)
<<
"Set up potential_startup_nodes_to_be_erased"
;
potential_startup_nodes_to_be_erased
.
emplace
(
node
);
}
}
if
(
!
potential_startup_nodes_to_be_erased
.
empty
())
{
for
(
auto
node
:
potential_startup_nodes_to_be_erased
)
{
VLOG
(
6
)
<<
"Erase nodes in potential_startup_nodes_to_be_erased"
;
potential_startup_nodes
->
erase
(
node
);
}
}
}
}
// Get Graph Info Betweent input target gradnode and outputs,
// record depending_nodes、 potential_stop_nodes、potential_startup_nodes
void
GetGraphInfoBetweenTargets
(
const
std
::
queue
<
GradNodeBase
*>&
init_queue
,
std
::
unordered_map
<
GradNodeBase
*
,
AutogradMeta
*>*
input_target_nodes_inputmeta_map
,
std
::
unordered_map
<
/*child node*/
GradNodeBase
*
,
/*father nodes*/
std
::
unordered_set
<
GradNodeBase
*>>*
depending_nodes
,
std
::
unordered_set
<
GradNodeBase
*>*
potential_stop_nodes
,
std
::
unordered_set
<
GradNodeBase
*>*
potential_startup_nodes
)
{
if
(
input_target_nodes_inputmeta_map
->
empty
())
return
;
VLOG
(
6
)
<<
"Runing In GetGraphInfoBetweenTargets"
;
// Calculate in_degree for each node
std
::
unordered_map
<
GradNodeBase
*
,
int
>
node_in_degree_map
;
// Copy nodes
std
::
queue
<
GradNodeBase
*>
queue
=
init_queue
;
std
::
unordered_set
<
GradNodeBase
*>
visited
;
// Visit each node exactly once in any order
while
(
!
queue
.
empty
())
{
GradNodeBase
*
node
=
queue
.
front
();
queue
.
pop
();
if
(
visited
.
count
(
node
))
{
continue
;
}
visited
.
insert
(
node
);
// Check node is target_nodes or not, if node is not target_node,
// all the next_node will be marked in potential_stop_nodes
bool
is_potential_stop_nodes
=
input_target_nodes_inputmeta_map
->
count
(
node
);
// Find and append next nodes
const
std
::
vector
<
std
::
vector
<
Edge
>>&
edges
=
node
->
GetEdges
();
for
(
const
auto
&
edge_list
:
edges
)
{
for
(
const
Edge
&
edge
:
edge_list
)
{
GradNodeBase
*
next_node
=
edge
.
GetMutableGradNode
().
get
();
// Next node could be nullptr if it is leaf tensor with no
// AccumulationNode attached
// Or it could also originated from dispensable inputs
if
(
!
next_node
)
continue
;
// if node not in input_target_nodes,
// all the next_nodes of current node will be inserted to
// potential_stop_node
if
(
is_potential_stop_nodes
)
{
potential_stop_nodes
->
emplace
(
next_node
);
}
// Update in_degree
if
(
!
node_in_degree_map
.
count
(
next_node
))
node_in_degree_map
[
next_node
]
=
0
;
node_in_degree_map
[
next_node
]
++
;
// Record depending relationship
(
*
depending_nodes
)[
next_node
].
emplace
(
node
);
queue
.
push
(
next_node
);
}
}
}
// Update Graph Info, remove some stop_node in potential_stop_nodes
UpdateGraphInfo
(
input_target_nodes_inputmeta_map
,
depending_nodes
,
potential_stop_nodes
,
potential_startup_nodes
);
}
void
GetTargetNodesInfo
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
inputs
,
std
::
unordered_map
<
GradNodeBase
*
,
AutogradMeta
*>*
target_nodes_inputmeta_map
)
{
VLOG
(
6
)
<<
"Running in GetTargetNodesInfo"
;
if
(
!
inputs
.
empty
())
{
VLOG
(
6
)
<<
"Inputs are not empty"
;
size_t
num_inputs
=
inputs
.
size
();
for
(
size_t
i
=
0
;
i
<
num_inputs
;
i
++
)
{
AutogradMeta
*
auto_grad_meta
=
EagerUtils
::
unsafe_autograd_meta
(
inputs
[
i
]);
auto
target_node
=
auto_grad_meta
->
GetMutableGradNode
().
get
();
PADDLE_ENFORCE_NOT_NULL
(
target_node
,
paddle
::
platform
::
errors
::
Fatal
(
"There is no grad op for input:%d or it's"
"stop_gradient=True"
,
i
));
(
*
target_nodes_inputmeta_map
)[
target_node
]
=
auto_grad_meta
;
}
}
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
GetResults
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
inputs
,
std
::
unordered_map
<
GradNodeBase
*
,
paddle
::
experimental
::
Tensor
>*
results_map
,
bool
allow_unused
,
bool
create_graph
)
{
VLOG
(
6
)
<<
"Running in GetResults"
;
if
(
inputs
.
empty
())
return
{};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
results
;
results
.
reserve
(
inputs
.
size
());
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
auto
&
input
=
inputs
[
i
];
AutogradMeta
*
auto_grad_meta
=
EagerUtils
::
unsafe_autograd_meta
(
input
);
auto
target_node
=
auto_grad_meta
->
GetMutableGradNode
().
get
();
auto
iter
=
results_map
->
find
(
target_node
);
if
(
iter
!=
results_map
->
end
())
{
// set StopGradient = !create_graph
AutogradMeta
*
tensor_auto_grad_meta
=
EagerUtils
::
autograd_meta
(
&
(
iter
->
second
));
tensor_auto_grad_meta
->
SetStopGradient
(
!
create_graph
);
results
.
emplace_back
(
iter
->
second
);
}
else
{
PADDLE_ENFORCE_EQ
(
allow_unused
,
true
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"The %d-th input does not appear in the backward "
"graph. Please check the input variable or set "
"allow_unused=True to get None result."
,
i
));
results
.
emplace_back
();
}
}
return
results
;
}
// Enforce GradNode has TensorWrappers as Input
void
EnforceGradNodeHasInput
(
GradNodeBase
*
node
)
{
VLOG
(
6
)
<<
"Running in EnforceGradNodeHasInput"
;
PADDLE_ENFORCE_NE
(
node
->
IsTensorWrappersCleared
(),
true
,
paddle
::
platform
::
errors
::
Fatal
(
"The TensorWrappers of %s do not exist. This may be because:
\n
"
"You calculate backward twice for the same subgraph without "
"setting retain_graph=True. Please set retain_graph=True in the "
"first backward/grad call.
\n
"
,
node
->
name
()));
}
// Purify potential_startup_nodes, remove nodes those are the same as
// input_target_nodes
void
PurifyPotentialStartUpNodes
(
std
::
unordered_set
<
GradNodeBase
*>*
potential_startup_nodes
,
std
::
unordered_map
<
GradNodeBase
*
,
AutogradMeta
*
/* InputMeta */
>*
input_target_nodes_inputmeta_map
)
{
VLOG
(
6
)
<<
"Running in PurifyPotentialStartUpNodes"
;
if
(
input_target_nodes_inputmeta_map
->
empty
())
return
;
std
::
unordered_set
<
GradNodeBase
*>
potential_startup_nodes_to_be_erased
;
for
(
auto
startup_op
:
*
potential_startup_nodes
)
{
auto
iter
=
input_target_nodes_inputmeta_map
->
find
(
startup_op
);
if
(
iter
!=
input_target_nodes_inputmeta_map
->
end
())
{
potential_startup_nodes_to_be_erased
.
emplace
(
iter
->
first
);
}
}
if
(
!
potential_startup_nodes_to_be_erased
.
empty
())
{
for
(
auto
nodes
:
potential_startup_nodes_to_be_erased
)
{
potential_startup_nodes
->
erase
(
nodes
);
}
}
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
RunBackward
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
,
// output
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
grad_tensors
,
bool
retain_graph
,
bool
create_graph
=
false
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
inputs
=
{},
bool
allow_unused
=
false
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
no_grad_vars
=
{})
{
VLOG
(
6
)
<<
"Start Backward"
;
VLOG
(
6
)
<<
"Start Backward"
;
// *Gradient Hook should happen at node-level
// *Gradient Hook should happen at node-level
// *Inplace version check should perform at node-level
// *Inplace version check should perform at node-level
// *Cross-batch accumulation happens at forward pass
// *Cross-batch accumulation happens at forward pass
std
::
unordered_map
<
GradNodeBase
*
,
AutogradMeta
*>
no_grad_var_nodes_inputmeta_map
;
// Get no_grad_vars's GradNodes and InputMeta Info
GetTargetNodesInfo
(
no_grad_vars
,
&
no_grad_var_nodes_inputmeta_map
);
/* --- Initialization --- */
/* --- Initialization --- */
// 1. Init queue with starting nodes
// 1. Init queue with starting nodes
// 2. Prepare initial input buffers
// 2. Prepare initial input buffers
std
::
queue
<
GradNodeBase
*>
queue
;
std
::
queue
<
GradNodeBase
*>
queue
;
std
::
unordered_map
<
GradNodeBase
*
,
std
::
unique_ptr
<
GradTensorHolder
>>
std
::
unordered_map
<
GradNodeBase
*
,
std
::
unique_ptr
<
GradTensorHolder
>>
node_input_buffers_dict
;
node_input_buffers_dict
;
std
::
unordered_set
<
GradNodeBase
*>
potential_startup_nodes
;
for
(
size_t
i
=
0
;
i
<
tensors
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
tensors
.
size
();
i
++
)
{
const
paddle
::
experimental
::
Tensor
&
tensor
=
tensors
[
i
];
const
paddle
::
experimental
::
Tensor
&
tensor
=
tensors
[
i
];
...
@@ -132,8 +366,17 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
...
@@ -132,8 +366,17 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
"size = 0 or same size as tensors"
));
"size = 0 or same size as tensors"
));
// Feed given tensor if it's provided
// Feed given tensor if it's provided
VLOG
(
6
)
<<
"Fill grad input tensor "
<<
i
<<
"with give grad tensor"
;
VLOG
(
6
)
<<
"Fill grad input tensor "
<<
i
<<
"with give grad tensor"
;
node_input_buffers_dict
[
grad_node
]
->
add
(
input_info
.
first
,
input_info
.
second
,
grad_tensors
[
i
]);
if
(
grad_tensors
[
i
].
is_initialized
())
{
// Deep copy
paddle
::
experimental
::
Tensor
tmp_tensor
;
tmp_tensor
.
copy_
(
grad_tensors
[
i
],
true
);
node_input_buffers_dict
[
grad_node
]
->
add
(
input_info
.
first
,
input_info
.
second
,
tmp_tensor
);
}
else
{
node_input_buffers_dict
[
grad_node
]
->
add
(
input_info
.
first
,
input_info
.
second
,
grad_tensors
[
i
]);
}
}
else
{
}
else
{
VLOG
(
6
)
<<
"Fill grad input tensor "
<<
i
<<
" with 1.0"
;
VLOG
(
6
)
<<
"Fill grad input tensor "
<<
i
<<
" with 1.0"
;
...
@@ -146,8 +389,9 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
...
@@ -146,8 +389,9 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
input_info
.
first
,
input_info
.
second
,
tensor
,
true
/*fill_one=true*/
);
input_info
.
first
,
input_info
.
second
,
tensor
,
true
/*fill_one=true*/
);
}
}
// Prepare queue
// Prepare queue
, potential startup_nodes
queue
.
push
(
grad_node
);
queue
.
push
(
grad_node
);
potential_startup_nodes
.
emplace
(
grad_node
);
}
}
VLOG
(
6
)
<<
"Update In degree Map for backward"
;
VLOG
(
6
)
<<
"Update In degree Map for backward"
;
...
@@ -155,25 +399,74 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
...
@@ -155,25 +399,74 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
std
::
unordered_map
<
GradNodeBase
*
,
int
>
node_in_degree_map
=
std
::
unordered_map
<
GradNodeBase
*
,
int
>
node_in_degree_map
=
getInDegreeMap
(
queue
);
getInDegreeMap
(
queue
);
// Get input's GradNodes and InputMeta Info
std
::
unordered_map
<
GradNodeBase
*
,
AutogradMeta
*
/* InputMeta */
>
input_target_nodes_inputmeta_map
;
GetTargetNodesInfo
(
inputs
,
&
input_target_nodes_inputmeta_map
);
// Purify potential_startup_ops, remove those nodes that are the same as
// input_target_nodes
PurifyPotentialStartUpNodes
(
&
potential_startup_nodes
,
&
input_target_nodes_inputmeta_map
);
// Get Graph Info Betweent input target gradnode and outputs
// Record the depending_nodes and potential_stop_nodes
std
::
unordered_map
<
GradNodeBase
*
/* child node */
,
std
::
unordered_set
<
GradNodeBase
*>
/* father node */
>
depending_nodes
;
std
::
unordered_set
<
GradNodeBase
*>
potential_stop_nodes
;
// std::unordered_set<GradNodeBase*> startup_ops;
GetGraphInfoBetweenTargets
(
queue
,
&
input_target_nodes_inputmeta_map
,
&
depending_nodes
,
&
potential_stop_nodes
,
&
potential_startup_nodes
);
// ready_queue store all startup nodes
std
::
queue
<
GradNodeBase
*>
ready_queue
;
// startup op's indegree should be 0
for
(
auto
node
:
potential_startup_nodes
)
{
if
(
node_in_degree_map
[
node
]
==
0
)
{
ready_queue
.
emplace
(
node
);
}
}
VLOG
(
1
)
<<
" startup_ops' size is :"
<<
ready_queue
.
size
();
std
::
unordered_map
<
GradNodeBase
*
,
paddle
::
experimental
::
Tensor
>
results_map
;
// read_queue is empty only when 1.input equals to output. 2.input can not
// reach to output.
if
(
ready_queue
.
size
()
==
0
)
{
for
(
auto
input_target_node
:
input_target_nodes_inputmeta_map
)
{
// out rank_info of forward op
auto
rank_info
=
input_target_node
.
second
->
OutRankInfo
();
if
(
node_input_buffers_dict
[
input_target_node
.
first
])
{
auto
&
target_result
=
node_input_buffers_dict
[
input_target_node
.
first
]
->
Buffers
()[
rank_info
.
first
][
rank_info
.
second
];
// save the target result
results_map
[
input_target_node
.
first
]
=
target_result
;
}
}
}
/* --- Topological Visit --- */
/* --- Topological Visit --- */
// 1. Pop queue
// 1. Pop queue
// 2. Run node
// 2. Run node
// |- Check and capture target result
// |- node(grads)
// |- node(grads)
// |- Prepare for next node
// |- Prepare for next node
// 3. Update queue
// 3. Update queue
VLOG
(
6
)
<<
"Run Backward"
;
VLOG
(
6
)
<<
"Run Backward"
;
while
(
!
queue
.
empty
())
{
while
(
!
ready_queue
.
empty
())
{
GradNodeBase
*
node
=
queue
.
front
();
GradNodeBase
*
node
=
ready_queue
.
front
();
VLOG
(
6
)
<<
"Running GradNode:"
<<
node
->
name
();
ready_queue
.
pop
();
paddle
::
platform
::
RecordEvent
node_record_event
(
paddle
::
platform
::
RecordEvent
node_record_event
(
std
::
string
(
typeid
(
*
node
).
name
())
+
" grad_node"
,
std
::
string
(
typeid
(
*
node
).
name
())
+
" grad_node"
,
paddle
::
platform
::
TracerEventType
::
Operator
,
1
);
paddle
::
platform
::
TracerEventType
::
Operator
,
1
);
if
(
queue
.
size
()
>
1
&&
node_in_degree_map
[
node
]
!=
0
)
{
queue
.
pop
();
continue
;
}
queue
.
pop
();
// Run node: This is where Hook happens
// Run node: This is where Hook happens
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
node_input_buffers_dict
.
count
(
node
),
node_input_buffers_dict
.
count
(
node
),
...
@@ -184,10 +477,45 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
...
@@ -184,10 +477,45 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
std
::
unique_ptr
<
GradTensorHolder
>
node_input_buffer
=
std
::
unique_ptr
<
GradTensorHolder
>
node_input_buffer
=
std
::
move
(
node_input_buffers_dict
[
node
]);
std
::
move
(
node_input_buffers_dict
[
node
]);
// get target grad_var from node_input_buffer by inputmeta
if
(
input_target_nodes_inputmeta_map
.
find
(
node
)
!=
input_target_nodes_inputmeta_map
.
end
())
{
VLOG
(
6
)
<<
"Get target result by by inputmeta"
;
// out rank_info of forward op
auto
rank_info
=
input_target_nodes_inputmeta_map
[
node
]
->
OutRankInfo
();
// rank_info is a pair, first means slot_id, second means rank.
auto
&
target_result
=
node_input_buffer
->
Buffers
()[
rank_info
.
first
][
rank_info
.
second
];
// save the target result
results_map
[
node
]
=
target_result
;
}
// no_grad_vars
if
(
no_grad_var_nodes_inputmeta_map
.
find
(
node
)
!=
no_grad_var_nodes_inputmeta_map
.
end
())
{
VLOG
(
6
)
<<
"Change the input buffer[slot][rank] by Zeros"
;
auto
rank_info
=
no_grad_var_nodes_inputmeta_map
[
node
]
->
OutRankInfo
();
node_input_buffer
->
SetBufferSlotRankZeros
(
rank_info
.
first
,
rank_info
.
second
);
}
VLOG
(
6
)
<<
"Running GradNode:"
<<
node
->
name
();
// check input
EnforceGradNodeHasInput
(
node
);
VLOG
(
6
)
<<
"Run Backward Kernel with GradTensorHolder"
;
VLOG
(
6
)
<<
"Run Backward Kernel with GradTensorHolder"
;
// Run Pre Backward Node and get outputs
// Run Pre Backward Node and get outputs
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
grad_output_tensors
=
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
grad_output_tensors
=
(
*
node
)(
node_input_buffer
->
Buffers
());
(
*
node
)(
node_input_buffer
->
Buffers
(),
create_graph
);
// retain_grad or not
if
(
!
retain_graph
)
{
VLOG
(
6
)
<<
"retain_graph is false, need to clear the TensorWrapper of nodes."
;
node
->
ClearTensorWrappers
();
}
// TODO(jiabin): Should we erase it or find a more efficient way.
// TODO(jiabin): Should we erase it or find a more efficient way.
node_input_buffers_dict
.
erase
(
node
);
node_input_buffers_dict
.
erase
(
node
);
...
@@ -252,18 +580,44 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
...
@@ -252,18 +580,44 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
// Update queue
// Update queue
node_in_degree_map
[
next_node
]
--
;
node_in_degree_map
[
next_node
]
--
;
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
node_in_degree_map
[
next_node
]
>=
0
,
node_in_degree_map
[
next_node
]
>=
0
,
paddle
::
platform
::
errors
::
Fatal
(
paddle
::
platform
::
errors
::
Fatal
(
"Detected in-degree value smaller than zero. For Node: %s"
"Detected in-degree value smaller than zero. For Node: %s"
"Node's in-degree cannot be negative"
,
"Node's in-degree cannot be negative"
,
next_node
->
name
()));
next_node
->
name
()));
if
(
node_in_degree_map
[
next_node
]
==
0
)
{
queue
.
emplace
(
std
::
move
(
next_node
));
bool
is_potential_stop_node
=
potential_stop_nodes
.
count
(
next_node
);
if
(
node_in_degree_map
[
next_node
]
==
0
&&
!
is_potential_stop_node
)
{
ready_queue
.
emplace
(
std
::
move
(
next_node
));
}
}
}
}
}
}
}
}
return
GetResults
(
inputs
,
&
results_map
,
allow_unused
,
create_graph
);
}
}
void
Backward
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
,
// output
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
grad_tensors
,
bool
retain_graph
)
{
VLOG
(
6
)
<<
"Run in Backward"
;
paddle
::
platform
::
RecordEvent
backward_record_event
(
"backward"
,
paddle
::
platform
::
TracerEventType
::
Operator
,
1
);
RunBackward
(
tensors
,
grad_tensors
,
retain_graph
);
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
Grad
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
,
// output
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
inputs
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
grad_tensors
,
bool
retain_graph
,
bool
create_graph
,
bool
only_inputs
,
bool
allow_unused
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
no_grad_vars
)
{
VLOG
(
6
)
<<
"Run in Grad"
;
return
RunBackward
(
tensors
,
grad_tensors
,
retain_graph
,
create_graph
,
inputs
,
allow_unused
,
no_grad_vars
);
}
}
// namespace egr
}
// namespace egr
paddle/fluid/eager/backward.h
浏览文件 @
4be77e53
...
@@ -19,12 +19,20 @@
...
@@ -19,12 +19,20 @@
namespace
egr
{
namespace
egr
{
//
run_b
ackward():
//
B
ackward():
// tensors corresponds to those lived in the backward graph
// tensors corresponds to those lived in the backward graph
// each grad_tensors[i] keeps the value for its corresponding tensors[i]
// each grad_tensors[i] keeps the value for its corresponding tensors[i]
void
RunBackward
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>
&
tensors
,
void
Backward
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>
&
grad_tensors
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
grad_tensors
,
bool
retain_graph
=
false
);
bool
retain_graph
=
false
);
std
::
vector
<
paddle
::
experimental
::
Tensor
>
Grad
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
tensors
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
inputs
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
grad_tensors
=
{},
bool
retain_graph
=
false
,
bool
create_graph
=
false
,
bool
only_inputs
=
false
,
bool
allow_unused
=
false
,
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
no_grad_vars
=
{});
// Reserved for gradient()
// Reserved for gradient()
...
...
paddle/fluid/eager/custom_operator/custom_operator_node.cc
浏览文件 @
4be77e53
...
@@ -20,8 +20,8 @@
...
@@ -20,8 +20,8 @@
namespace
egr
{
namespace
egr
{
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
RunCustomOpNode
::
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
RunCustomOpNode
::
operator
()(
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
{
bool
create_graph
)
{
paddle
::
CustomOpKernelContext
ctx
;
paddle
::
CustomOpKernelContext
ctx
;
auto
grad_inputs_name
=
paddle
::
framework
::
OpMetaInfoHelper
::
GetInputs
(
auto
grad_inputs_name
=
paddle
::
framework
::
OpMetaInfoHelper
::
GetInputs
(
egr
::
Controller
::
Instance
().
GetOpMetaInfoMap
().
at
(
op_type_
)[
1
]);
egr
::
Controller
::
Instance
().
GetOpMetaInfoMap
().
at
(
op_type_
)[
1
]);
...
...
paddle/fluid/eager/custom_operator/custom_operator_node.h
浏览文件 @
4be77e53
...
@@ -37,8 +37,8 @@ class RunCustomOpNode : public GradNodeBase {
...
@@ -37,8 +37,8 @@ class RunCustomOpNode : public GradNodeBase {
// Functor: perform backward computations
// Functor: perform backward computations
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
override
;
bool
create_graph
)
override
;
std
::
string
name
()
{
std
::
string
name
()
{
return
paddle
::
string
::
Sprintf
(
"RunCustomOpNode: %s_grad"
,
op_type_
);
return
paddle
::
string
::
Sprintf
(
"RunCustomOpNode: %s_grad"
,
op_type_
);
...
@@ -62,6 +62,12 @@ class RunCustomOpNode : public GradNodeBase {
...
@@ -62,6 +62,12 @@ class RunCustomOpNode : public GradNodeBase {
return
res
;
return
res
;
}
}
void
ClearTensorWrappers
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
}
bool
IsTensorWrappersCleared
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
return
false
;
}
void
SetAttrs
(
const
std
::
vector
<
paddle
::
any
>&
attr
)
{
attrs_
=
attr
;
}
void
SetAttrs
(
const
std
::
vector
<
paddle
::
any
>&
attr
)
{
attrs_
=
attr
;
}
public:
public:
...
...
paddle/fluid/eager/grad_node_info.h
浏览文件 @
4be77e53
...
@@ -95,8 +95,12 @@ class GradNodeBase {
...
@@ -95,8 +95,12 @@ class GradNodeBase {
* is better choice to fit this format.
* is better choice to fit this format.
* **/
* **/
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
=
0
;
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
bool
create_graph
=
false
)
=
0
;
virtual
void
ClearTensorWrappers
()
=
0
;
virtual
bool
IsTensorWrappersCleared
()
=
0
;
/**
/**
* AddEdges is designed to set input tensors' backward Node as current
* AddEdges is designed to set input tensors' backward Node as current
* node's Edges.
* node's Edges.
...
...
paddle/fluid/eager/grad_tensor_holder.cc
浏览文件 @
4be77e53
...
@@ -21,6 +21,11 @@
...
@@ -21,6 +21,11 @@
namespace
egr
{
namespace
egr
{
void
GradTensorHolder
::
SetBufferSlotRankZeros
(
size_t
slot_id
,
size_t
rank
)
{
buffer_
[
slot_id
][
rank
]
=
paddle
::
experimental
::
zeros_like
(
buffer_
[
slot_id
][
rank
]);
}
void
GradTensorHolder
::
add
(
size_t
slot_id
,
size_t
rank
,
void
GradTensorHolder
::
add
(
size_t
slot_id
,
size_t
rank
,
const
paddle
::
experimental
::
Tensor
&
t
,
const
paddle
::
experimental
::
Tensor
&
t
,
bool
fill_one
)
{
bool
fill_one
)
{
...
...
paddle/fluid/eager/grad_tensor_holder.h
浏览文件 @
4be77e53
...
@@ -56,6 +56,8 @@ class GradTensorHolder {
...
@@ -56,6 +56,8 @@ class GradTensorHolder {
return
buffer_
;
return
buffer_
;
}
}
void
SetBufferSlotRankZeros
(
size_t
slot_id
,
size_t
rank
);
private:
private:
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
buffer_
;
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
buffer_
;
};
};
...
...
paddle/fluid/eager/tensor_wrapper.h
浏览文件 @
4be77e53
...
@@ -98,6 +98,8 @@ class TensorWrapper {
...
@@ -98,6 +98,8 @@ class TensorWrapper {
}
}
}
}
void
clear
()
{
intermidiate_tensor_
.
reset
();
}
private:
private:
bool
full_reserved_
=
false
;
bool
full_reserved_
=
false
;
std
::
pair
<
size_t
,
size_t
>
out_rank_info_
;
std
::
pair
<
size_t
,
size_t
>
out_rank_info_
;
...
...
paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
浏览文件 @
4be77e53
...
@@ -17,6 +17,14 @@
...
@@ -17,6 +17,14 @@
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/core/kernel_registry.h"
PD_DECLARE_KERNEL
(
copy
,
CPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
copy_sr
,
CPU
,
ALL_LAYOUT
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_DECLARE_KERNEL
(
copy
,
GPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
copy_sr
,
GPU
,
ALL_LAYOUT
);
#endif
namespace
eager_test
{
namespace
eager_test
{
using
AbstractAutogradMeta
=
paddle
::
experimental
::
AbstractAutogradMeta
;
using
AbstractAutogradMeta
=
paddle
::
experimental
::
AbstractAutogradMeta
;
...
@@ -151,5 +159,50 @@ TEST(EagerVariable, Constructor) {
...
@@ -151,5 +159,50 @@ TEST(EagerVariable, Constructor) {
CHECK_EQ
(
dt3_tmp_ptr
[
1
],
10.0
f
);
CHECK_EQ
(
dt3_tmp_ptr
[
1
],
10.0
f
);
t4
.
reset
();
t4
.
reset
();
CHECK
(
t4
.
defined
()
==
false
);
CHECK
(
t4
.
defined
()
==
false
);
VLOG
(
6
)
<<
"Check Tensor Copy_"
;
std
::
vector
<
int64_t
>
rows
=
{
1
,
2
};
std
::
vector
<
int64_t
>
dims
=
{
2
};
paddle
::
experimental
::
Tensor
t7
(
std
::
make_shared
<
phi
::
SelectedRows
>
(
rows
,
2
));
std
::
dynamic_pointer_cast
<
phi
::
SelectedRows
>
(
t7
.
impl
())
->
mutable_value
()
->
Resize
(
phi
::
make_ddim
(
dims
));
auto
*
dt7_tmp_ptr
=
std
::
dynamic_pointer_cast
<
phi
::
SelectedRows
>
(
t7
.
impl
())
->
mutable_value
()
->
mutable_data
<
float
>
(
paddle
::
platform
::
CPUPlace
());
dt7_tmp_ptr
[
0
]
=
6.0
f
;
dt7_tmp_ptr
[
1
]
=
11.0
f
;
paddle
::
experimental
::
Tensor
t8
;
paddle
::
experimental
::
Tensor
t5
;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
paddle
::
experimental
::
Tensor
t6
;
paddle
::
experimental
::
Tensor
t9
;
VLOG
(
6
)
<<
"Check Tensor Copy_ Selected Rows"
;
t8
.
copy_
(
t7
,
paddle
::
platform
::
CUDAPlace
(
0
),
true
);
t9
.
copy_
(
t8
,
paddle
::
platform
::
CPUPlace
(),
true
);
auto
*
dt9_tmp_ptr
=
std
::
dynamic_pointer_cast
<
phi
::
SelectedRows
>
(
t9
.
impl
())
->
value
()
.
data
<
float
>
();
CHECK_EQ
(
dt9_tmp_ptr
[
0
],
6.0
f
);
CHECK_EQ
(
dt9_tmp_ptr
[
1
],
11.0
f
);
CHECK_EQ
(
std
::
dynamic_pointer_cast
<
phi
::
SelectedRows
>
(
t9
.
impl
())
->
height
(),
2
);
VLOG
(
6
)
<<
"Check Tensor Copy_ Dense Tensor"
;
t5
.
copy_
(
t3
,
paddle
::
platform
::
CUDAPlace
(
0
),
true
);
t6
.
copy_
(
t5
,
paddle
::
platform
::
CPUPlace
(),
true
);
auto
*
dt6_tmp_ptr
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
t6
.
impl
())
->
data
<
float
>
();
CHECK_EQ
(
dt6_tmp_ptr
[
0
],
5.0
f
);
CHECK_EQ
(
dt6_tmp_ptr
[
1
],
10.0
f
);
#else
t5
.
copy_
(
t3
,
paddle
::
platform
::
CPUPlace
(),
true
);
auto
*
dt5_tmp_ptr
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
t5
.
impl
())
->
data
<
float
>
();
CHECK_EQ
(
dt5_tmp_ptr
[
0
],
5.0
f
);
CHECK_EQ
(
dt5_tmp_ptr
[
1
],
10.0
f
);
#endif
VLOG
(
6
)
<<
"Finish"
;
VLOG
(
6
)
<<
"Finish"
;
}
}
paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h
浏览文件 @
4be77e53
...
@@ -32,8 +32,8 @@ class GradTestNode : public egr::GradNodeBase {
...
@@ -32,8 +32,8 @@ class GradTestNode : public egr::GradNodeBase {
GradTestNode
()
:
GradNodeBase
()
{
val_
=
1.0
;
}
GradTestNode
()
:
GradNodeBase
()
{
val_
=
1.0
;
}
std
::
string
name
()
override
{
return
"GradTestNode"
;
}
std
::
string
name
()
override
{
return
"GradTestNode"
;
}
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
)
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>&
grads
,
override
{
bool
create_graph
=
false
)
override
{
val_
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
grads
[
0
][
0
].
impl
())
val_
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
grads
[
0
][
0
].
impl
())
->
data
<
float
>
()[
0
];
->
data
<
float
>
()[
0
];
phi
::
DenseTensorMeta
meta
=
phi
::
DenseTensorMeta
meta
=
...
@@ -49,6 +49,11 @@ class GradTestNode : public egr::GradNodeBase {
...
@@ -49,6 +49,11 @@ class GradTestNode : public egr::GradNodeBase {
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
res
=
{{
et1
}};
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
res
=
{{
et1
}};
return
res
;
return
res
;
}
}
void
ClearTensorWrappers
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
}
bool
IsTensorWrappersCleared
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
return
false
;
}
float
val_
;
float
val_
;
};
};
}
// namespace eager_test
}
// namespace eager_test
paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
浏览文件 @
4be77e53
...
@@ -58,7 +58,7 @@ void benchmark_eager_scale(const paddle::experimental::Tensor& tensor,
...
@@ -58,7 +58,7 @@ void benchmark_eager_scale(const paddle::experimental::Tensor& tensor,
}
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
input_tensor
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
input_tensor
};
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
if
(
accuracy_check
)
{
// Examine Forward Grad (w.r.t max_num_runs = 10)
// Examine Forward Grad (w.r.t max_num_runs = 10)
...
@@ -80,7 +80,7 @@ void benchmark_eager_matmul(const paddle::experimental::Tensor& X,
...
@@ -80,7 +80,7 @@ void benchmark_eager_matmul(const paddle::experimental::Tensor& X,
}
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
input_tensor0
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
input_tensor0
};
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
if
(
accuracy_check
)
{
// Examine Forward Grad (w.r.t max_num_runs = 2)
// Examine Forward Grad (w.r.t max_num_runs = 2)
...
@@ -106,7 +106,7 @@ void benchmark_eager_intermediate_matmul(const paddle::experimental::Tensor& X,
...
@@ -106,7 +106,7 @@ void benchmark_eager_intermediate_matmul(const paddle::experimental::Tensor& X,
}
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
input_tensor0
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
input_tensor0
};
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
if
(
accuracy_check
)
{
// Examine Forward Grad (w.r.t max_num_runs = 2)
// Examine Forward Grad (w.r.t max_num_runs = 2)
...
@@ -137,7 +137,7 @@ void benchmark_eager_intermediate_mlp(
...
@@ -137,7 +137,7 @@ void benchmark_eager_intermediate_mlp(
reduce_sum_dygraph_function
(
input0
,
{{
"reduce_all"
,
true
}});
reduce_sum_dygraph_function
(
input0
,
{{
"reduce_all"
,
true
}});
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
Out
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
Out
};
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
if
(
accuracy_check
)
{
if
(
accuracy_check
)
{
std
::
unordered_map
<
std
::
string
,
float
>
result
=
std
::
unordered_map
<
std
::
string
,
float
>
result
=
...
...
paddle/fluid/eager/tests/task_tests/CMakeLists.txt
浏览文件 @
4be77e53
...
@@ -5,6 +5,7 @@ cc_test(test_egr_task_backward SRCS backward_test.cc DEPS ${eager_deps} ${fluid_
...
@@ -5,6 +5,7 @@ cc_test(test_egr_task_backward SRCS backward_test.cc DEPS ${eager_deps} ${fluid_
cc_test
(
test_egr_task_hook SRCS hook_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_hook SRCS hook_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
cc_test
(
test_egr_task_grad SRCS grad_test.cc DEPS
${
eager_deps
}
${
fluid_deps
}
eager_scale scale_node
)
if
(
NOT
((
NOT WITH_PYTHON
)
AND ON_INFER
))
if
(
NOT
((
NOT WITH_PYTHON
)
AND ON_INFER
))
cc_test
(
test_egr_task_hook_intermidiate SRCS hook_test_intermidiate.cc DEPS
${
eager_deps
}
${
fluid_deps
}
${
generated_deps
}
dygraph_node
)
cc_test
(
test_egr_task_hook_intermidiate SRCS hook_test_intermidiate.cc DEPS
${
eager_deps
}
${
fluid_deps
}
${
generated_deps
}
dygraph_node
)
...
...
paddle/fluid/eager/tests/task_tests/backward_test.cc
浏览文件 @
4be77e53
...
@@ -33,6 +33,7 @@
...
@@ -33,6 +33,7 @@
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
PD_DECLARE_KERNEL
(
full
,
CPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
full
,
CPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
copy
,
CPU
,
ALL_LAYOUT
);
namespace
egr
{
namespace
egr
{
...
@@ -79,7 +80,7 @@ TEST(Backward, SingleNodeEmptyGrad) {
...
@@ -79,7 +80,7 @@ TEST(Backward, SingleNodeEmptyGrad) {
}
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
target_tensor
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
target_tensor
};
// Run Backward
// Run Backward
Run
Backward
(
outs
,
{});
Backward
(
outs
,
{});
// Check Output Value
// Check Output Value
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
5.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
5.0
);
...
@@ -138,7 +139,7 @@ TEST(Backward, SingleNodeCustomGrad) {
...
@@ -138,7 +139,7 @@ TEST(Backward, SingleNodeCustomGrad) {
}
}
// Run Backward
// Run Backward
Run
Backward
(
target_tensors
,
grad_tensors
);
Backward
(
target_tensors
,
grad_tensors
);
// Check Output Value
// Check Output Value
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
50.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
50.0
);
...
@@ -211,7 +212,7 @@ TEST(Backward, LinearNodes) {
...
@@ -211,7 +212,7 @@ TEST(Backward, LinearNodes) {
}
}
// Use Empty Grad Tensor
// Use Empty Grad Tensor
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
// Check Output Value
// Check Output Value
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
50.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
50.0
);
...
@@ -315,7 +316,7 @@ TEST(Backward, WithAccumulation) {
...
@@ -315,7 +316,7 @@ TEST(Backward, WithAccumulation) {
node2_ptr
->
AddEdges
(
&
res2
,
0
);
node2_ptr
->
AddEdges
(
&
res2
,
0
);
}
}
Run
Backward
(
target_tensors
,
grad_tensors
);
Backward
(
target_tensors
,
grad_tensors
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
2500.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
2500.0
);
}
}
...
...
paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc
浏览文件 @
4be77e53
...
@@ -71,12 +71,12 @@ TEST(CrossBatchAccumulation, SingleScaleNode) {
...
@@ -71,12 +71,12 @@ TEST(CrossBatchAccumulation, SingleScaleNode) {
std
::
vector
<
egr
::
AutogradMeta
*>
res
=
{
meta
};
std
::
vector
<
egr
::
AutogradMeta
*>
res
=
{
meta
};
scale_node_ptr
->
AddEdges
(
&
res
,
0
);
scale_node_ptr
->
AddEdges
(
&
res
,
0
);
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
5.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
5.0
);
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
10.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
10.0
);
...
...
paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc
浏览文件 @
4be77e53
...
@@ -86,7 +86,7 @@ TEST(FwdBwdJoint, SingleNode) {
...
@@ -86,7 +86,7 @@ TEST(FwdBwdJoint, SingleNode) {
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out
};
// 4. Run Backward
// 4. Run Backward
Run
Backward
(
outs
,
{});
Backward
(
outs
,
{});
VLOG
(
7
)
<<
"Target Grad is: "
VLOG
(
7
)
<<
"Target Grad is: "
<<
std
::
static_pointer_cast
<
phi
::
DenseTensor
>
(
<<
std
::
static_pointer_cast
<
phi
::
DenseTensor
>
(
...
@@ -137,7 +137,7 @@ TEST(FwdBwdJoint, LinearNodes) {
...
@@ -137,7 +137,7 @@ TEST(FwdBwdJoint, LinearNodes) {
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
};
// 4. Run Backward
// 4. Run Backward
Run
Backward
(
outs
,
{});
Backward
(
outs
,
{});
// Examine Backward Grad
// Examine Backward Grad
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
10.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
10.0
);
...
@@ -203,7 +203,7 @@ TEST(FwdBwdJoint, BranchedNodes) {
...
@@ -203,7 +203,7 @@ TEST(FwdBwdJoint, BranchedNodes) {
// 4. Run Backward
// 4. Run Backward
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
Run
Backward
(
outs
,
{});
Backward
(
outs
,
{});
// Examine Backward Grad
// Examine Backward Grad
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
...
@@ -260,7 +260,7 @@ TEST(FwdBwdJoint, GradientHook) {
...
@@ -260,7 +260,7 @@ TEST(FwdBwdJoint, GradientHook) {
// 4. Run Backward
// 4. Run Backward
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
Run
Backward
(
outs
,
{});
Backward
(
outs
,
{});
// Examine Backward Grad
// Examine Backward Grad
// leaf grad
// leaf grad
...
@@ -318,13 +318,13 @@ TEST(FwdBwdJoint, CrossBatchAccumulation) {
...
@@ -318,13 +318,13 @@ TEST(FwdBwdJoint, CrossBatchAccumulation) {
// 4. Run Backward
// 4. Run Backward
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
Run
Backward
(
outs
,
{});
Backward
(
outs
,
{});
// Examine Backward Grad
// Examine Backward Grad
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
// Cross Batch Accumulation
// Cross Batch Accumulation
Run
Backward
(
outs
,
{});
Backward
(
outs
,
{});
// Examine Backward Grad
// Examine Backward Grad
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
60.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
60.0
);
...
@@ -356,7 +356,7 @@ TEST(FwdBwdJoint, SingleNodeCUDA) {
...
@@ -356,7 +356,7 @@ TEST(FwdBwdJoint, SingleNodeCUDA) {
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out
};
// 4. Run Backward
// 4. Run Backward
Run
Backward
(
outs
,
{});
Backward
(
outs
,
{});
// Examine Backward Grad
// Examine Backward Grad
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
2.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
2.0
);
...
@@ -412,7 +412,7 @@ TEST(FwdBwdJoint, BranchedNodesCUDA) {
...
@@ -412,7 +412,7 @@ TEST(FwdBwdJoint, BranchedNodesCUDA) {
// TODO(jiabin): fix this with add functor
// TODO(jiabin): fix this with add functor
// 4. Run Backward
// 4. Run Backward
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
out1
,
out2
};
Run
Backward
(
outs
,
{});
Backward
(
outs
,
{});
// Examine Backward Grad
// Examine Backward Grad
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
30.0
);
...
...
paddle/fluid/eager/tests/task_tests/generated_test.cc
浏览文件 @
4be77e53
...
@@ -57,7 +57,7 @@ TEST(Generated, Sigmoid) {
...
@@ -57,7 +57,7 @@ TEST(Generated, Sigmoid) {
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
output_tensor
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
output_tensor
};
VLOG
(
6
)
<<
"Runing Backward"
;
VLOG
(
6
)
<<
"Runing Backward"
;
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
VLOG
(
6
)
<<
"Finish Backward"
;
VLOG
(
6
)
<<
"Finish Backward"
;
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
0.25
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
tensor
,
0.25
);
...
@@ -89,7 +89,7 @@ TEST(Generated, Matmul_v2) {
...
@@ -89,7 +89,7 @@ TEST(Generated, Matmul_v2) {
eager_test
::
CompareTensorWithValue
<
float
>
(
output_tensor
,
96
);
eager_test
::
CompareTensorWithValue
<
float
>
(
output_tensor
,
96
);
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
output_tensor
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
output_tensor
};
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
2.0
*
20
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
2.0
*
20
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
Y
,
3.0
*
4
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
Y
,
3.0
*
4
);
...
@@ -120,7 +120,7 @@ TEST(Generated, ElementwiseAdd) {
...
@@ -120,7 +120,7 @@ TEST(Generated, ElementwiseAdd) {
eager_test
::
CompareTensorWithValue
<
float
>
(
output_tensor
,
5
);
eager_test
::
CompareTensorWithValue
<
float
>
(
output_tensor
,
5
);
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
output_tensor
};
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
=
{
output_tensor
};
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
Y
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
Y
,
1.0
);
...
...
paddle/fluid/eager/tests/task_tests/grad_test.cc
0 → 100644
浏览文件 @
4be77e53
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h"
#include "paddle/fluid/eager/api/utils/tensor_utils.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/backward.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/tests/test_utils.h"
#include "paddle/fluid/eager/api/all.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_meta.h"
PD_DECLARE_KERNEL
(
full
,
CPU
,
ALL_LAYOUT
);
PD_DECLARE_KERNEL
(
copy
,
CPU
,
ALL_LAYOUT
);
namespace
egr
{
TEST
(
Grad
,
SingleNodeEmptyGrad
)
{
// Prepare Device Contexts
eager_test
::
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// Prepare Inputs
paddle
::
framework
::
DDim
ddim
=
phi
::
make_ddim
({
4
,
16
,
16
,
32
});
// Create Target Tensor (output)
paddle
::
experimental
::
Tensor
output_tensor
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
false
/*is_leaf*/
);
// Create input tensor
const
paddle
::
experimental
::
Tensor
leaf_tensor
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
true
/*is_leaf*/
);
{
// Create Scale Node
auto
node0_ptr
=
std
::
make_shared
<
GradNodeScale
>
(
1
,
1
);
node0_ptr
->
SetAttributes_scale
(
5.0
/*scale*/
);
// Set grad in/out meta
node0_ptr
->
SetDefaultGradInOutMeta
();
// Output_tensor set GradNode、OutRank、StopGradient propertis
AutogradMeta
*
auto_grad_meta
=
EagerUtils
::
autograd_meta
(
&
output_tensor
);
auto_grad_meta
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
node0_ptr
));
auto_grad_meta
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta
->
SetStopGradient
(
false
);
// Get autograd_meta from input tensor
AutogradMeta
*
auto_grad_meta1
=
EagerUtils
::
unsafe_autograd_meta
(
leaf_tensor
);
// Connect Tensor and AccumulationNode via AutoGradMeta
auto
acc_node_ptr
=
std
::
make_shared
<
egr
::
GradNodeAccumulation
>
(
auto_grad_meta1
);
// input tensor set GradNode、OutRank、StopGradient propertis
auto_grad_meta1
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
acc_node_ptr
));
auto_grad_meta1
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta1
->
SetStopGradient
(
false
);
// grad_node Add Edges
std
::
vector
<
egr
::
AutogradMeta
*>
res
=
{
auto_grad_meta1
};
node0_ptr
->
AddEdges
(
&
res
,
0
);
}
std
::
vector
<
paddle
::
experimental
::
Tensor
>
outs
=
{
output_tensor
};
// Run Grad
auto
result
=
Grad
(
outs
,
{
leaf_tensor
},
{});
// Check Output Value
eager_test
::
CompareTensorWithValue
<
float
>
(
result
[
0
],
5.0
);
}
TEST
(
Grad
,
SingleNodeCustomGrad
)
{
// Prepare Device Contexts
eager_test
::
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// Prepare Inputs
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
;
paddle
::
framework
::
DDim
ddim
=
phi
::
make_ddim
({
4
,
16
,
16
,
32
});
// Create Target Tensor
paddle
::
experimental
::
Tensor
tensor
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
false
/*is_leaf*/
);
target_tensors
.
emplace_back
(
std
::
move
(
tensor
));
std
::
vector
<
paddle
::
experimental
::
Tensor
>
grad_tensors
;
// Create Grad Tensor
paddle
::
experimental
::
Tensor
grad_tensor
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
10.0
/*value*/
,
false
/*is_leaf*/
);
grad_tensors
.
emplace_back
(
std
::
move
(
grad_tensor
));
paddle
::
experimental
::
Tensor
leaf_tensor
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
true
/*is_leaf*/
);
{
// Create Scale Node
auto
node0_ptr
=
std
::
make_shared
<
GradNodeScale
>
(
1
,
1
);
node0_ptr
->
SetAttributes_scale
(
5.0
/*scale*/
);
// Set grad in/out meta
node0_ptr
->
SetDefaultGradInOutMeta
();
// Connect Tensor and Node via AutoGradMeta
AutogradMeta
*
auto_grad_meta
=
EagerUtils
::
autograd_meta
(
&
(
target_tensors
[
0
]));
auto_grad_meta
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
node0_ptr
));
auto_grad_meta
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta
->
SetStopGradient
(
false
);
AutogradMeta
*
auto_grad_meta1
=
EagerUtils
::
autograd_meta
(
&
leaf_tensor
);
// Connect Tensor and AccumulationNode via AutoGradMeta
auto
acc_node_ptr
=
std
::
make_shared
<
egr
::
GradNodeAccumulation
>
(
auto_grad_meta1
);
auto_grad_meta1
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
acc_node_ptr
));
auto_grad_meta1
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta1
->
SetStopGradient
(
false
);
std
::
vector
<
egr
::
AutogradMeta
*>
res
=
{
auto_grad_meta1
};
node0_ptr
->
AddEdges
(
&
res
,
0
);
}
auto
result
=
Grad
(
target_tensors
,
{
leaf_tensor
},
grad_tensors
);
// Check Output Value
eager_test
::
CompareTensorWithValue
<
float
>
(
result
[
0
],
50.0
);
}
/*
Node1
|
Node0
|
{ } // empty grad tensor
*/
TEST
(
Grad
,
LinearNodes
)
{
// Prepare Device Contexts
eager_test
::
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// Prepare Target Tensor
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
;
paddle
::
framework
::
DDim
ddim
=
phi
::
make_ddim
({
4
,
16
,
16
,
32
});
// Create Target Tensor
paddle
::
experimental
::
Tensor
tensor
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
false
/*is_leaf*/
);
target_tensors
.
emplace_back
(
std
::
move
(
tensor
));
paddle
::
experimental
::
Tensor
leaf_tensor
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
true
/*is_leaf*/
);
{
// Create Node0
auto
node0_ptr
=
std
::
make_shared
<
GradNodeScale
>
(
1
,
1
);
node0_ptr
->
SetAttributes_scale
(
5.0
/*scale*/
);
// Set grad in/out meta for node0
node0_ptr
->
SetDefaultGradInOutMeta
();
// Create Node1
auto
node1_ptr
=
std
::
make_shared
<
GradNodeScale
>
(
1
,
1
);
node1_ptr
->
SetAttributes_scale
(
10.0
/*scale*/
);
// Set grad in/out meta for node1
node1_ptr
->
SetDefaultGradInOutMeta
();
// Connect Input Tensor and Node0 via AutoGradMeta
AutogradMeta
*
auto_grad_meta
=
EagerUtils
::
autograd_meta
(
&
(
target_tensors
[
0
]));
auto_grad_meta
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
node0_ptr
));
auto_grad_meta
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta
->
SetStopGradient
(
false
);
// Connect Node0 -> Node1 via Edge
auto
meta0
=
egr
::
AutogradMeta
();
meta0
.
SetStopGradient
(
false
);
meta0
.
SetSingleOutRankWithSlot
(
0
,
0
);
meta0
.
SetGradNode
(
node1_ptr
);
std
::
vector
<
egr
::
AutogradMeta
*>
res0
=
{
&
meta0
};
node0_ptr
->
AddEdges
(
&
res0
,
0
);
AutogradMeta
*
auto_grad_meta1
=
EagerUtils
::
autograd_meta
(
&
leaf_tensor
);
// Connect Tensor and AccumulationNode via AutoGradMeta
auto
acc_node_ptr
=
std
::
make_shared
<
egr
::
GradNodeAccumulation
>
(
auto_grad_meta1
);
auto_grad_meta1
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
acc_node_ptr
));
auto_grad_meta1
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta1
->
SetStopGradient
(
false
);
std
::
vector
<
egr
::
AutogradMeta
*>
res1
=
{
auto_grad_meta1
};
node1_ptr
->
AddEdges
(
&
res1
,
0
);
}
// Use Empty Grad Tensor
auto
result
=
Grad
(
target_tensors
,
{
leaf_tensor
},
{});
// Check Output Value
eager_test
::
CompareTensorWithValue
<
float
>
(
result
[
0
],
50.0
);
}
/*
Node2
| |
Node0 Node1
| |
in0 in1
*/
TEST
(
Grad
,
WithAccumulation
)
{
// Prepare Device Contexts
eager_test
::
InitEnv
(
paddle
::
platform
::
CPUPlace
());
// Prepare Inputs
paddle
::
framework
::
DDim
ddim
=
phi
::
make_ddim
({
4
,
16
,
16
,
32
});
// Create Target Tensor
std
::
vector
<
paddle
::
experimental
::
Tensor
>
target_tensors
;
paddle
::
experimental
::
Tensor
tensor0
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
false
/*is_leaf*/
);
paddle
::
experimental
::
Tensor
tensor1
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
1.0
/*value*/
,
false
/*is_leaf*/
);
target_tensors
.
emplace_back
(
std
::
move
(
tensor0
));
target_tensors
.
emplace_back
(
std
::
move
(
tensor1
));
// Create Grad Tensor
std
::
vector
<
paddle
::
experimental
::
Tensor
>
grad_tensors
;
paddle
::
experimental
::
Tensor
grad_tensor0
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
5.0
/*value*/
,
false
/*is_leaf*/
);
paddle
::
experimental
::
Tensor
grad_tensor1
=
egr_utils_api
::
CreateTensorWithValue
(
ddim
,
paddle
::
platform
::
CPUPlace
(),
phi
::
DataType
::
FLOAT32
,
phi
::
DataLayout
::
NCHW
,
10.0
/*value*/
,
false
/*is_leaf*/
);
grad_tensors
.
emplace_back
(
std
::
move
(
grad_tensor0
));
grad_tensors
.
emplace_back
(
std
::
move
(
grad_tensor1
));
paddle
::
experimental
::
Tensor
leaf_tensor
;
{
// Create Node0
auto
node0_ptr
=
std
::
make_shared
<
GradNodeScale
>
(
1
,
1
);
node0_ptr
->
SetAttributes_scale
(
5.0
/*scale*/
);
node0_ptr
->
SetDefaultGradInOutMeta
();
// Create Node1
auto
node1_ptr
=
std
::
make_shared
<
GradNodeScale
>
(
1
,
1
);
node1_ptr
->
SetAttributes_scale
(
10.0
/*scale*/
);
node1_ptr
->
SetDefaultGradInOutMeta
();
// Create Node2
auto
node2_ptr
=
std
::
make_shared
<
GradNodeScale
>
(
1
,
1
);
node2_ptr
->
SetAttributes_scale
(
20.0
/*scale*/
);
node2_ptr
->
SetDefaultGradInOutMeta
();
// Connect Inp0 and Node0 via AutoGradMeta
AutogradMeta
*
auto_grad_meta0
=
EagerUtils
::
autograd_meta
(
&
(
target_tensors
[
0
]));
auto_grad_meta0
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
node0_ptr
));
auto_grad_meta0
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta0
->
SetStopGradient
(
false
);
// Connect Inp1 and Node1 via AutoGradMeta
AutogradMeta
*
auto_grad_meta1
=
EagerUtils
::
autograd_meta
(
&
(
target_tensors
[
1
]));
auto_grad_meta1
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
node1_ptr
));
auto_grad_meta1
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta1
->
SetStopGradient
(
false
);
// Connect Node0 -> Node2 via Edge
auto
meta0
=
egr
::
AutogradMeta
();
meta0
.
SetStopGradient
(
false
);
meta0
.
SetSingleOutRankWithSlot
(
0
,
0
);
meta0
.
SetGradNode
(
node2_ptr
);
std
::
vector
<
egr
::
AutogradMeta
*>
res0
=
{
&
meta0
};
node0_ptr
->
AddEdges
(
&
res0
,
0
);
// Connect Node1 -> Node2 via Edge
auto
meta1
=
egr
::
AutogradMeta
();
meta1
.
SetStopGradient
(
false
);
meta1
.
SetSingleOutRankWithSlot
(
0
,
0
);
meta1
.
SetGradNode
(
node2_ptr
);
std
::
vector
<
egr
::
AutogradMeta
*>
res1
=
{
&
meta1
};
node1_ptr
->
AddEdges
(
&
res1
,
0
);
AutogradMeta
*
auto_grad_meta2
=
EagerUtils
::
autograd_meta
(
&
leaf_tensor
);
// Connect Tensor and AccumulationNode via AutoGradMeta
auto
acc_node_ptr
=
std
::
make_shared
<
egr
::
GradNodeAccumulation
>
(
auto_grad_meta2
);
auto_grad_meta2
->
SetGradNode
(
std
::
dynamic_pointer_cast
<
GradNodeBase
>
(
acc_node_ptr
));
auto_grad_meta2
->
SetSingleOutRankWithSlot
(
0
,
0
);
auto_grad_meta2
->
SetStopGradient
(
false
);
std
::
vector
<
egr
::
AutogradMeta
*>
res2
=
{
auto_grad_meta2
};
node2_ptr
->
AddEdges
(
&
res2
,
0
);
}
auto
result
=
Grad
(
target_tensors
,
{
leaf_tensor
},
grad_tensors
);
eager_test
::
CompareTensorWithValue
<
float
>
(
result
[
0
],
2500.0
);
}
}
// namespace egr
paddle/fluid/eager/tests/task_tests/hook_test.cc
浏览文件 @
4be77e53
...
@@ -132,7 +132,7 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
...
@@ -132,7 +132,7 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
leaf_tensor
);
// result: 4.0*5.0 + 3.0 = 23.0
leaf_tensor
);
// result: 4.0*5.0 + 3.0 = 23.0
}
}
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
4.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
4.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
23.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
23.0
);
...
@@ -199,7 +199,7 @@ TEST(RetainGrad, HookAfterRetainGrad) {
...
@@ -199,7 +199,7 @@ TEST(RetainGrad, HookAfterRetainGrad) {
leaf_tensor
,
std
::
make_shared
<
egr
::
CppTensorHook
>
(
hook_function
));
leaf_tensor
,
std
::
make_shared
<
egr
::
CppTensorHook
>
(
hook_function
));
}
}
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
target_tensor
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
23.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
leaf_tensor
,
23.0
);
}
}
...
...
paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc
浏览文件 @
4be77e53
...
@@ -108,7 +108,7 @@ void test_sigmoid(bool is_remove_gradient_hook) {
...
@@ -108,7 +108,7 @@ void test_sigmoid(bool is_remove_gradient_hook) {
}
}
VLOG
(
6
)
<<
"Runing Backward"
;
VLOG
(
6
)
<<
"Runing Backward"
;
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
VLOG
(
6
)
<<
"Finish Backward"
;
VLOG
(
6
)
<<
"Finish Backward"
;
eager_test
::
CompareGradTensorWithValue
<
float
>
(
eager_test
::
CompareGradTensorWithValue
<
float
>
(
...
@@ -166,7 +166,7 @@ void test_elementwiseAdd(bool is_remove_gradient_hook) {
...
@@ -166,7 +166,7 @@ void test_elementwiseAdd(bool is_remove_gradient_hook) {
grad_node_tmp
->
RemoveGradientHook
(
hook_id
);
grad_node_tmp
->
RemoveGradientHook
(
hook_id
);
}
}
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
1.0
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
eager_test
::
CompareGradTensorWithValue
<
float
>
(
...
@@ -224,7 +224,7 @@ void test_matmul(bool is_remove_gradient_hook) {
...
@@ -224,7 +224,7 @@ void test_matmul(bool is_remove_gradient_hook) {
grad_node_tmp
->
RemoveGradientHook
(
hook_id
);
grad_node_tmp
->
RemoveGradientHook
(
hook_id
);
}
}
Run
Backward
(
target_tensors
,
{});
Backward
(
target_tensors
,
{});
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
2.0
*
20
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
X
,
2.0
*
20
);
eager_test
::
CompareGradTensorWithValue
<
float
>
(
eager_test
::
CompareGradTensorWithValue
<
float
>
(
...
...
paddle/fluid/eager/to_static/run_program_op_node.h
浏览文件 @
4be77e53
...
@@ -370,8 +370,8 @@ class GradNodeRunProgram : public egr::GradNodeBase {
...
@@ -370,8 +370,8 @@ class GradNodeRunProgram : public egr::GradNodeBase {
~
GradNodeRunProgram
()
override
=
default
;
~
GradNodeRunProgram
()
override
=
default
;
// Functor: perform backward computations
// Functor: perform backward computations
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
virtual
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
operator
()(
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
&
grads
)
const
std
::
vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>>
&
grads
,
override
{
bool
create_graph
)
override
{
VLOG
(
3
)
<<
"Running Eager Backward Node: GradNodeRunProgram"
;
VLOG
(
3
)
<<
"Running Eager Backward Node: GradNodeRunProgram"
;
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
grads
.
size
(),
1
,
grads
.
size
(),
1
,
...
@@ -415,6 +415,12 @@ class GradNodeRunProgram : public egr::GradNodeBase {
...
@@ -415,6 +415,12 @@ class GradNodeRunProgram : public egr::GradNodeBase {
// return {x_grad, details::DereferenceTensors(params_grad_ptr)};
// return {x_grad, details::DereferenceTensors(params_grad_ptr)};
}
}
void
ClearTensorWrappers
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
}
bool
IsTensorWrappersCleared
()
override
{
VLOG
(
6
)
<<
"Do nothing here now"
;
return
false
;
}
// SetAttrMap
// SetAttrMap
void
SetAttrMap
(
const
paddle
::
framework
::
AttributeMap
&
attrs
)
{
void
SetAttrMap
(
const
paddle
::
framework
::
AttributeMap
&
attrs
)
{
attrs_
=
attrs
;
attrs_
=
attrs
;
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
4be77e53
...
@@ -97,6 +97,7 @@ pass_library(layer_norm_fuse_pass inference)
...
@@ -97,6 +97,7 @@ pass_library(layer_norm_fuse_pass inference)
pass_library
(
add_support_int8_pass inference
)
pass_library
(
add_support_int8_pass inference
)
pass_library
(
matmul_scale_fuse_pass inference
)
pass_library
(
matmul_scale_fuse_pass inference
)
pass_library
(
gpu_cpu_map_matmul_to_mul_pass inference
)
pass_library
(
gpu_cpu_map_matmul_to_mul_pass inference
)
pass_library
(
mixed_precision_configure_pass inference
)
pass_library
(
generate_pass DEPS pass_desc_proto
)
pass_library
(
generate_pass DEPS pass_desc_proto
)
target_link_libraries
(
generate_pass pass_desc_proto
)
target_link_libraries
(
generate_pass pass_desc_proto
)
...
...
paddle/fluid/framework/ir/mixed_precision_configure_pass.cc
0 → 100644
浏览文件 @
4be77e53
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/mixed_precision_configure_pass.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
MixedPrecisionConfigurePass
::
InsertCastOps
(
Graph
*
graph
,
const
StringSet
&
blacklist
)
const
{
VLOG
(
3
)
<<
"Insert the cast op before and after the kernel that does not "
"supports fp16 precision"
;
auto
update_cast_desc
=
[
&
](
framework
::
OpDesc
&
desc
,
const
std
::
string
&
x_name
,
const
std
::
string
&
out_name
,
const
int
in_dtype
,
const
int
out_dtype
)
{
desc
.
SetType
(
"cast"
);
desc
.
SetInput
(
"X"
,
{
x_name
});
desc
.
SetOutput
(
"Out"
,
{
out_name
});
desc
.
SetAttr
(
"in_dtype"
,
in_dtype
);
desc
.
SetAttr
(
"out_dtype"
,
out_dtype
);
desc
.
SetAttr
(
"use_mkldnn"
,
false
);
desc
.
SetAttr
(
"with_quant_attr"
,
false
);
desc
.
Flush
();
};
auto
cast_input
=
[
&
](
Graph
*
graph
,
Node
*
op_node
,
const
StringSet
&
cast_list
)
{
auto
inlinks
=
op_node
->
inputs
;
for
(
auto
*
pre_node
:
inlinks
)
{
if
(
pre_node
->
IsVar
())
{
const
auto
is_persistable
=
pre_node
->
Var
()
->
Persistable
();
const
auto
is_float
=
pre_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP16
||
pre_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP32
||
pre_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP64
;
if
(
!
is_persistable
&&
is_float
)
{
int
suffix
=
0
;
for
(
auto
*
pre_node_input
:
pre_node
->
inputs
)
{
if
(
!
pre_node_input
->
IsOp
())
continue
;
const
auto
&
type
=
pre_node_input
->
Op
()
->
Type
();
if
(
!
cast_list
.
count
(
type
)
&&
type
!=
"cast"
)
{
std
::
string
old_name
=
pre_node
->
Name
();
std
::
string
new_name
=
old_name
+
"_cast.tmp_"
+
std
::
to_string
(
suffix
);
suffix
++
;
framework
::
OpDesc
new_op_desc
(
op_node
->
Op
()
->
Block
());
// 4 for fp16, 5 for fp32
update_cast_desc
(
new_op_desc
,
old_name
,
new_name
,
4
,
5
);
auto
*
new_op
=
graph
->
CreateOpNode
(
&
new_op_desc
);
VarDesc
out_var
(
new_name
);
out_var
.
SetPersistable
(
false
);
auto
*
node_var
=
graph
->
CreateVarNode
(
&
out_var
);
op_node
->
Op
()
->
RenameInput
(
old_name
,
new_name
);
IR_NODE_LINK_TO
(
pre_node
,
new_op
);
IR_NODE_LINK_TO
(
new_op
,
node_var
);
IR_NODE_LINK_TO
(
node_var
,
op_node
);
}
}
}
}
}
};
auto
cast_output
=
[
&
](
Graph
*
graph
,
Node
*
op_node
,
const
StringSet
&
cast_list
)
{
auto
outlinks
=
op_node
->
outputs
;
for
(
auto
*
next_node
:
outlinks
)
{
if
(
next_node
->
IsVar
())
{
const
auto
is_persistable
=
next_node
->
Var
()
->
Persistable
();
const
auto
is_float
=
next_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP16
||
next_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP32
||
next_node
->
Var
()
->
GetDataType
()
==
proto
::
VarType
::
FP64
;
if
(
!
is_persistable
&&
is_float
)
{
int
suffix
=
0
;
for
(
auto
*
next_node_output
:
next_node
->
outputs
)
{
if
(
!
next_node_output
->
IsOp
())
continue
;
const
auto
&
type
=
next_node_output
->
Op
()
->
Type
();
if
(
!
cast_list
.
count
(
type
)
&&
type
!=
"cast"
)
{
std
::
string
old_name
=
next_node
->
Name
();
std
::
string
new_name
=
old_name
+
"_cast.tmp_"
+
std
::
to_string
(
suffix
);
suffix
++
;
framework
::
OpDesc
new_op_desc
(
op_node
->
Op
()
->
Block
());
// 4 for fp16, 5 for fp32
update_cast_desc
(
new_op_desc
,
old_name
,
new_name
,
5
,
4
);
auto
*
new_op
=
graph
->
CreateOpNode
(
&
new_op_desc
);
VarDesc
out_var
(
new_name
);
out_var
.
SetPersistable
(
false
);
auto
*
node_var
=
graph
->
CreateVarNode
(
&
out_var
);
next_node_output
->
Op
()
->
RenameInput
(
old_name
,
new_name
);
IR_NODE_LINK_TO
(
next_node
,
new_op
);
IR_NODE_LINK_TO
(
new_op
,
node_var
);
IR_NODE_LINK_TO
(
node_var
,
next_node_output
);
}
}
}
}
}
};
for
(
auto
*
op_node
:
ir
::
TopologyVarientSort
(
*
graph
,
static_cast
<
ir
::
SortKind
>
(
0
)))
{
if
(
!
op_node
->
IsOp
()
||
op_node
->
Op
()
->
Type
()
==
"feed"
||
op_node
->
Op
()
->
Type
()
==
"fetch"
)
continue
;
const
auto
&
type
=
op_node
->
Op
()
->
Type
();
if
(
blacklist
.
count
(
type
))
{
cast_input
(
graph
,
op_node
,
blacklist
);
cast_output
(
graph
,
op_node
,
blacklist
);
}
}
}
void
MixedPrecisionConfigurePass
::
ApplyImpl
(
Graph
*
graph
)
const
{
const
auto
blacklist
=
Get
<
std
::
unordered_set
<
std
::
string
>>
(
"gpu_fp16_disabled_op_types"
);
InsertCastOps
(
graph
,
blacklist
);
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
mixed_precision_configure_pass
,
paddle
::
framework
::
ir
::
MixedPrecisionConfigurePass
);
paddle/fluid/framework/ir/mixed_precision_configure_pass.h
0 → 100644
浏览文件 @
4be77e53
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
using
StringSet
=
std
::
unordered_set
<
std
::
string
>
;
class
MixedPrecisionConfigurePass
:
public
FusePassBase
{
public:
MixedPrecisionConfigurePass
()
=
default
;
virtual
~
MixedPrecisionConfigurePass
()
{}
protected:
void
ApplyImpl
(
Graph
*
graph
)
const
override
;
private:
void
InsertCastOps
(
Graph
*
graph
,
const
StringSet
&
blacklist
)
const
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/inference/analysis/argument.h
浏览文件 @
4be77e53
...
@@ -188,6 +188,9 @@ struct Argument {
...
@@ -188,6 +188,9 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
use_gpu
,
UseGPU
,
bool
);
DECL_ARGUMENT_FIELD
(
use_gpu
,
UseGPU
,
bool
);
DECL_ARGUMENT_FIELD
(
use_fc_padding
,
UseFcPadding
,
bool
);
DECL_ARGUMENT_FIELD
(
use_fc_padding
,
UseFcPadding
,
bool
);
DECL_ARGUMENT_FIELD
(
gpu_device_id
,
GPUDeviceId
,
int
);
DECL_ARGUMENT_FIELD
(
gpu_device_id
,
GPUDeviceId
,
int
);
DECL_ARGUMENT_FIELD
(
use_gpu_fp16
,
UseGPUFp16
,
bool
);
DECL_ARGUMENT_FIELD
(
gpu_fp16_disabled_op_types
,
GpuFp16DisabledOpTypes
,
std
::
unordered_set
<
std
::
string
>
);
// Usually use for trt dynamic shape.
// Usually use for trt dynamic shape.
// TRT will select the best kernel according to opt shape
// TRT will select the best kernel according to opt shape
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
4be77e53
...
@@ -189,6 +189,10 @@ void IRPassManager::CreatePasses(Argument *argument,
...
@@ -189,6 +189,10 @@ void IRPassManager::CreatePasses(Argument *argument,
new
int
(
argument
->
dlnne_min_subgraph_size
()));
new
int
(
argument
->
dlnne_min_subgraph_size
()));
pass
->
Set
(
"program"
,
pass
->
Set
(
"program"
,
new
framework
::
ProgramDesc
*
(
&
argument
->
main_program
()));
new
framework
::
ProgramDesc
*
(
&
argument
->
main_program
()));
}
else
if
(
pass_name
==
"mixed_precision_configure_pass"
)
{
pass
->
Set
(
"gpu_fp16_disabled_op_types"
,
new
std
::
unordered_set
<
std
::
string
>
(
argument
->
gpu_fp16_disabled_op_types
()));
}
}
if
(
pass_name
==
"lite_subgraph_pass"
)
{
if
(
pass_name
==
"lite_subgraph_pass"
)
{
bool
lite_enable_int8
=
bool
lite_enable_int8
=
...
...
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
浏览文件 @
4be77e53
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
...
@@ -65,6 +66,26 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToNpu(Argument *argument) {
...
@@ -65,6 +66,26 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToNpu(Argument *argument) {
#else
#else
void
IrParamsSyncAmongDevicesPass
::
GetVarNameToOpTypeMap
(
const
framework
::
ir
::
Graph
&
graph
,
std
::
unordered_map
<
std
::
string
,
std
::
string
>
*
var_name_op_type_map
)
{
std
::
vector
<
framework
::
ir
::
Node
*>
node_list
=
framework
::
ir
::
TopologyVarientSort
(
graph
,
static_cast
<
framework
::
ir
::
SortKind
>
(
0
));
for
(
auto
*
op_node
:
node_list
)
{
if
(
!
op_node
->
IsOp
()
||
op_node
->
Op
()
->
Type
()
==
"feed"
||
op_node
->
Op
()
->
Type
()
==
"fetch"
)
continue
;
for
(
auto
*
pre_node
:
op_node
->
inputs
)
{
if
(
pre_node
->
IsVar
()
&&
pre_node
->
Var
()
->
Persistable
())
{
var_name_op_type_map
->
insert
(
std
::
pair
<
std
::
string
,
std
::
string
>
(
pre_node
->
Var
()
->
Name
(),
op_node
->
Op
()
->
Type
()));
}
}
}
}
void
IrParamsSyncAmongDevicesPass
::
CopyParamsToGpu
(
Argument
*
argument
)
{
void
IrParamsSyncAmongDevicesPass
::
CopyParamsToGpu
(
Argument
*
argument
)
{
// The parameters are on the cpu, therefore, synchronization is not necessary.
// The parameters are on the cpu, therefore, synchronization is not necessary.
if
(
!
argument
->
use_gpu
())
return
;
if
(
!
argument
->
use_gpu
())
return
;
...
@@ -102,6 +123,16 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
...
@@ -102,6 +123,16 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
if
(
with_dynamic_shape
)
{
if
(
with_dynamic_shape
)
{
reserve_cpu_weights
=
true
;
reserve_cpu_weights
=
true
;
}
}
bool
mixed_precision_mode
=
argument
->
Has
(
"use_gpu_fp16"
)
&&
argument
->
use_gpu_fp16
();
std
::
unordered_map
<
std
::
string
,
std
::
string
>
var_name_op_type_map
{};
std
::
unordered_set
<
std
::
string
>
blacklist
{};
if
(
mixed_precision_mode
)
{
GetVarNameToOpTypeMap
(
graph
,
&
var_name_op_type_map
);
blacklist
=
argument
->
gpu_fp16_disabled_op_types
();
}
for
(
auto
&
var_name
:
all_vars
)
{
for
(
auto
&
var_name
:
all_vars
)
{
if
(
std
::
count
(
repetitive_params
.
begin
(),
repetitive_params
.
end
(),
if
(
std
::
count
(
repetitive_params
.
begin
(),
repetitive_params
.
end
(),
var_name
))
{
var_name
))
{
...
@@ -117,18 +148,29 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
...
@@ -117,18 +148,29 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
var
->
IsType
<
framework
::
Tensor
>
())
{
var
->
IsType
<
framework
::
Tensor
>
())
{
auto
*
t
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
t
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
platform
::
CPUPlace
cpu_place
;
bool
is_float
=
t
->
dtype
()
==
paddle
::
experimental
::
DataType
::
FLOAT32
||
framework
::
LoDTensor
temp_tensor
;
t
->
dtype
()
==
paddle
::
experimental
::
DataType
::
FLOAT64
;
temp_tensor
.
Resize
(
t
->
dims
());
if
(
mixed_precision_mode
&&
temp_tensor
.
mutable_data
<
float
>
(
cpu_place
);
!
blacklist
.
count
(
var_name_op_type_map
[
var_name
])
&&
is_float
)
{
framework
::
Tensor
half_tensor
;
// Copy the parameter data to a tmp tensor.
half_tensor
.
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT16
);
paddle
::
framework
::
TensorCopySync
(
*
t
,
cpu_place
,
&
temp_tensor
);
half_tensor
.
Resize
(
t
->
dims
());
// Reallocation the space on GPU
auto
*
half_data
=
t
->
clear
();
half_tensor
.
mutable_data
<
float16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
t
->
numel
();
i
++
)
{
// Copy parameter data to newly allocated GPU space.
auto
*
data
=
t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
paddle
::
framework
::
TensorCopySync
(
temp_tensor
,
place
,
t
);
half_data
[
i
]
=
static_cast
<
float16
>
(
data
[
i
]);
}
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
half_tensor
,
place
,
t
);
}
else
{
platform
::
CPUPlace
cpu_place
;
framework
::
LoDTensor
temp_tensor
;
temp_tensor
.
Resize
(
t
->
dims
());
paddle
::
framework
::
TensorCopySync
(
*
t
,
cpu_place
,
&
temp_tensor
);
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
temp_tensor
,
place
,
t
);
}
}
}
}
}
}
}
...
...
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h
浏览文件 @
4be77e53
...
@@ -38,7 +38,12 @@ class IrParamsSyncAmongDevicesPass : public AnalysisPass {
...
@@ -38,7 +38,12 @@ class IrParamsSyncAmongDevicesPass : public AnalysisPass {
#ifdef PADDLE_WITH_ASCEND_CL
#ifdef PADDLE_WITH_ASCEND_CL
void
CopyParamsToNpu
(
Argument
*
argument
);
void
CopyParamsToNpu
(
Argument
*
argument
);
#else
#else
void
CopyParamsToGpu
(
Argument
*
argument
);
void
GetVarNameToOpTypeMap
(
const
framework
::
ir
::
Graph
&
graph
,
std
::
unordered_map
<
std
::
string
,
std
::
string
>*
var_name_op_type_map
);
void
CopyParamsToGpu
(
Argument
*
argument
);
#endif
#endif
};
};
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
4be77e53
...
@@ -83,6 +83,7 @@ void AnalysisConfig::SetModel(const std::string &prog_file_path,
...
@@ -83,6 +83,7 @@ void AnalysisConfig::SetModel(const std::string &prog_file_path,
Update
();
Update
();
}
}
void
AnalysisConfig
::
EnableUseGpu
(
uint64_t
memory_pool_init_size_mb
,
void
AnalysisConfig
::
EnableUseGpu
(
uint64_t
memory_pool_init_size_mb
,
int
device_id
)
{
int
device_id
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
@@ -97,12 +98,26 @@ void AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb,
...
@@ -97,12 +98,26 @@ void AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb,
Update
();
Update
();
}
}
void
AnalysisConfig
::
DisableGpu
()
{
void
AnalysisConfig
::
DisableGpu
()
{
use_gpu_
=
false
;
use_gpu_
=
false
;
Update
();
Update
();
}
}
void
AnalysisConfig
::
Exp_EnableUseGpuFp16
(
std
::
unordered_set
<
std
::
string
>
op_list
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
use_gpu_fp16_
=
true
;
gpu_fp16_disabled_op_types_
.
insert
(
op_list
.
begin
(),
op_list
.
end
());
#else
LOG
(
ERROR
)
<<
"Please compile with gpu to Exp_EnableUseGpuFp16()"
;
use_gpu_fp16_
=
false
;
#endif
Update
();
}
void
AnalysisConfig
::
DisableFCPadding
()
{
void
AnalysisConfig
::
DisableFCPadding
()
{
use_fc_padding_
=
false
;
use_fc_padding_
=
false
;
...
@@ -213,6 +228,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
...
@@ -213,6 +228,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
use_cudnn_
);
CP_MEMBER
(
use_cudnn_
);
CP_MEMBER
(
gpu_device_id_
);
CP_MEMBER
(
gpu_device_id_
);
CP_MEMBER
(
memory_pool_init_size_mb_
);
CP_MEMBER
(
memory_pool_init_size_mb_
);
CP_MEMBER
(
use_gpu_fp16_
);
CP_MEMBER
(
gpu_fp16_disabled_op_types_
);
CP_MEMBER
(
enable_memory_optim_
);
CP_MEMBER
(
enable_memory_optim_
);
// TensorRT related.
// TensorRT related.
...
@@ -573,6 +590,20 @@ void AnalysisConfig::Update() {
...
@@ -573,6 +590,20 @@ void AnalysisConfig::Update() {
#endif
#endif
}
}
if
(
use_gpu_fp16_
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
!
enable_ir_optim_
)
{
LOG
(
ERROR
)
<<
"Exp_EnableUseGpuFp16() only works when IR optimization is "
"enabled."
;
}
else
if
(
!
use_gpu
())
{
LOG
(
ERROR
)
<<
"Exp_EnableUseGpuFp16() only works when use_gpu is enabled."
;
}
else
{
pass_builder
()
->
Exp_EnableUseGpuFp16
();
}
#endif
}
if
(
use_mkldnn_
)
{
if
(
use_mkldnn_
)
{
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
if
(
!
enable_ir_optim_
)
{
if
(
!
enable_ir_optim_
)
{
...
@@ -669,6 +700,8 @@ std::string AnalysisConfig::SerializeInfoCache() {
...
@@ -669,6 +700,8 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
params_file_
;
ss
<<
params_file_
;
ss
<<
use_gpu_
;
ss
<<
use_gpu_
;
ss
<<
use_gpu_fp16_
;
for
(
auto
&
item
:
gpu_fp16_disabled_op_types_
)
ss
<<
item
;
ss
<<
use_fc_padding_
;
ss
<<
use_fc_padding_
;
ss
<<
gpu_device_id_
;
ss
<<
gpu_device_id_
;
ss
<<
xpu_device_id_
;
ss
<<
xpu_device_id_
;
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
4be77e53
...
@@ -872,6 +872,11 @@ void AnalysisPredictor::PrepareArgument() {
...
@@ -872,6 +872,11 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetDlnneMinSubgraphSize
(
config_
.
dlnne_min_subgraph_size_
);
argument_
.
SetDlnneMinSubgraphSize
(
config_
.
dlnne_min_subgraph_size_
);
}
}
if
(
config_
.
gpu_fp16_enabled
())
{
argument_
.
SetUseGPUFp16
(
true
);
argument_
.
SetGpuFp16DisabledOpTypes
(
config_
.
gpu_fp16_disabled_op_types_
);
}
if
(
config_
.
lite_engine_enabled
())
{
if
(
config_
.
lite_engine_enabled
())
{
argument_
.
SetCpuMathLibraryNumThreads
(
argument_
.
SetCpuMathLibraryNumThreads
(
config_
.
cpu_math_library_num_threads
());
config_
.
cpu_math_library_num_threads
());
...
...
paddle/fluid/inference/api/analysis_predictor_tester.cc
浏览文件 @
4be77e53
...
@@ -375,6 +375,19 @@ TEST(AnalysisPredictor, enable_onnxruntime) {
...
@@ -375,6 +375,19 @@ TEST(AnalysisPredictor, enable_onnxruntime) {
ASSERT_TRUE
(
!
config
.
use_onnxruntime
());
ASSERT_TRUE
(
!
config
.
use_onnxruntime
());
}
}
TEST
(
AnalysisPredictor
,
exp_enable_use_gpu_fp16
)
{
AnalysisConfig
config
;
config
.
SwitchIrOptim
();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
config
.
EnableUseGpu
(
100
,
0
);
config
.
Exp_EnableUseGpuFp16
();
ASSERT_TRUE
(
config
.
gpu_fp16_enabled
());
#else
config
.
DisableGpu
();
#endif
LOG
(
INFO
)
<<
config
.
Summary
();
}
}
// namespace paddle
}
// namespace paddle
namespace
paddle_infer
{
namespace
paddle_infer
{
...
@@ -434,6 +447,19 @@ TEST(Predictor, EnableONNXRuntime) {
...
@@ -434,6 +447,19 @@ TEST(Predictor, EnableONNXRuntime) {
auto
predictor
=
CreatePredictor
(
config
);
auto
predictor
=
CreatePredictor
(
config
);
}
}
TEST
(
Predictor
,
Exp_EnableUseGpuFp16
)
{
Config
config
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
SwitchIrOptim
();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
config
.
EnableUseGpu
(
100
,
0
);
config
.
Exp_EnableUseGpuFp16
();
#else
config
.
DisableGpu
();
#endif
auto
predictor
=
CreatePredictor
(
config
);
}
TEST
(
Tensor
,
CpuShareExternalData
)
{
TEST
(
Tensor
,
CpuShareExternalData
)
{
Config
config
;
Config
config
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
SetModel
(
FLAGS_dirname
);
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
4be77e53
...
@@ -253,6 +253,19 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -253,6 +253,19 @@ struct PD_INFER_DECL AnalysisConfig {
///
///
///
///
void
DisableGpu
();
void
DisableGpu
();
///
/// \brief Enable GPU fp16 precision computation, in experimental state.
///
/// \param op_list The operator type list.
///
void
Exp_EnableUseGpuFp16
(
std
::
unordered_set
<
std
::
string
>
op_list
=
{});
///
/// \brief A boolean state telling whether the GPU fp16 precision is turned
/// on.
///
/// \return bool Whether the GPU fp16 precision is turned on.
///
bool
gpu_fp16_enabled
()
const
{
return
use_gpu_fp16_
;
}
///
///
/// \brief Turn on XPU.
/// \brief Turn on XPU.
...
@@ -859,6 +872,9 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -859,6 +872,9 @@ struct PD_INFER_DECL AnalysisConfig {
int
gpu_device_id_
{
0
};
int
gpu_device_id_
{
0
};
uint64_t
memory_pool_init_size_mb_
{
100
};
// initial size is 100MB.
uint64_t
memory_pool_init_size_mb_
{
100
};
// initial size is 100MB.
bool
thread_local_stream_
{
false
};
bool
thread_local_stream_
{
false
};
bool
use_gpu_fp16_
{
false
};
std
::
unordered_set
<
std
::
string
>
gpu_fp16_disabled_op_types_
{
"conv2d_fusion"
,
"conv2d"
,
"roll"
,
"strided_slice"
};
bool
use_cudnn_
{
false
};
bool
use_cudnn_
{
false
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
4be77e53
...
@@ -172,6 +172,40 @@ void GpuPassStrategy::EnableCUDNN() {
...
@@ -172,6 +172,40 @@ void GpuPassStrategy::EnableCUDNN() {
use_cudnn_
=
true
;
use_cudnn_
=
true
;
}
}
void
GpuPassStrategy
::
Exp_EnableUseGpuFp16
()
{
passes_
.
assign
({
"is_test_pass"
,
//
"simplify_with_basic_ops_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_eltwiseadd_bn_fuse_pass"
,
//
"embedding_eltwise_layernorm_fuse_pass"
,
//
"multihead_matmul_fuse_pass_v2"
,
//
"gpu_cpu_squeeze2_matmul_fuse_pass"
,
//
"gpu_cpu_reshape2_matmul_fuse_pass"
,
//
"gpu_cpu_flatten2_matmul_fuse_pass"
,
//
"gpu_cpu_map_matmul_v2_to_mul_pass"
,
//
"gpu_cpu_map_matmul_v2_to_matmul_pass"
,
//
"gpu_cpu_map_matmul_to_mul_pass"
,
//
// "fc_fuse_pass", //
"fc_elementwise_layernorm_fuse_pass"
,
//
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
// guaranteed at least v7
// cudnn8.0 has memory leak problem in conv + eltwise + act, so we
// disable the pass.
#if !(CUDNN_VERSION >= 8000 && CUDNN_VERSION < 8100)
"conv_elementwise_add_act_fuse_pass"
,
//
"conv_elementwise_add2_act_fuse_pass"
,
//
#endif
"conv_elementwise_add_fuse_pass"
,
//
#endif //
"transpose_flatten_concat_fuse_pass"
,
//
"mixed_precision_configure_pass"
,
//
"runtime_context_cache_pass"
//
});
use_gpu_fp16_
=
true
;
}
void
GpuPassStrategy
::
EnableMKLDNN
()
{
void
GpuPassStrategy
::
EnableMKLDNN
()
{
LOG
(
ERROR
)
<<
"GPU not support MKLDNN yet"
;
LOG
(
ERROR
)
<<
"GPU not support MKLDNN yet"
;
}
}
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
4be77e53
...
@@ -125,6 +125,9 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
...
@@ -125,6 +125,9 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \brief Enable the use of cuDNN kernel.
/// \brief Enable the use of cuDNN kernel.
virtual
void
EnableCUDNN
()
{}
virtual
void
EnableCUDNN
()
{}
/// \brief Enable use gpu fp16 kernel.
virtual
void
Exp_EnableUseGpuFp16
()
{}
/// \brief Enable the use of MKLDNN.
/// \brief Enable the use of MKLDNN.
/// The MKLDNN control exists in both CPU and GPU mode, because there can
/// The MKLDNN control exists in both CPU and GPU mode, because there can
/// still be some CPU kernels running in GPU mode.
/// still be some CPU kernels running in GPU mode.
...
@@ -140,6 +143,10 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
...
@@ -140,6 +143,10 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \return A bool variable implying whether we are in gpu mode.
/// \return A bool variable implying whether we are in gpu mode.
bool
use_gpu
()
const
{
return
use_gpu_
;
}
bool
use_gpu
()
const
{
return
use_gpu_
;
}
/// \brief Check if we are using gpu fp16 kernel.
/// \return A bool variable implying whether we are in gpu fp16 mode.
bool
use_gpu_fp16
()
const
{
return
use_gpu_fp16_
;
}
/// \brief Check if we are using xpu.
/// \brief Check if we are using xpu.
/// \return A bool variable implying whether we are in xpu mode.
/// \return A bool variable implying whether we are in xpu mode.
bool
use_xpu
()
const
{
return
use_xpu_
;
}
bool
use_xpu
()
const
{
return
use_xpu_
;
}
...
@@ -162,6 +169,7 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
...
@@ -162,6 +169,7 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
bool
use_npu_
{
false
};
bool
use_npu_
{
false
};
bool
use_ipu_
{
false
};
bool
use_ipu_
{
false
};
bool
use_mkldnn_
{
false
};
bool
use_mkldnn_
{
false
};
bool
use_gpu_fp16_
{
false
};
/// \endcond
/// \endcond
};
};
...
@@ -223,6 +231,9 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
...
@@ -223,6 +231,9 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
/// \brief Enable the use of cuDNN kernel.
/// \brief Enable the use of cuDNN kernel.
void
EnableCUDNN
()
override
;
void
EnableCUDNN
()
override
;
/// \brief Enable the use of gpu fp16 kernel.
void
Exp_EnableUseGpuFp16
()
override
;
/// \brief Not supported in GPU mode yet.
/// \brief Not supported in GPU mode yet.
void
EnableMKLDNN
()
override
;
void
EnableMKLDNN
()
override
;
...
@@ -238,6 +249,7 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
...
@@ -238,6 +249,7 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
protected:
protected:
/// \cond Protected
/// \cond Protected
bool
use_cudnn_
{
false
};
bool
use_cudnn_
{
false
};
bool
use_gpu_fp16_
{
false
};
/// \endcond
/// \endcond
};
};
...
...
paddle/fluid/operators/fake_quantize_op.cu
浏览文件 @
4be77e53
...
@@ -28,13 +28,14 @@ __global__ void FindAbsMaxKernel(const T* in, const int n, T* out) {
...
@@ -28,13 +28,14 @@ __global__ void FindAbsMaxKernel(const T* in, const int n, T* out) {
extern
__shared__
char
*
shared_max_data_tmp
[];
extern
__shared__
char
*
shared_max_data_tmp
[];
auto
shared_max_data
=
reinterpret_cast
<
T
*>
(
shared_max_data_tmp
);
auto
shared_max_data
=
reinterpret_cast
<
T
*>
(
shared_max_data_tmp
);
if
(
gridDim
.
x
>
1
)
{
if
(
gridDim
.
x
>
1
)
{
shared_max_data
[
tid
]
=
T
(
0
);
T
local_max_data
=
T
(
0
);
for
(
int
i
=
bid
;
i
<
n
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
bid
;
i
<
n
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
T
tmp
=
abs
(
in
[
i
]);
T
tmp
=
abs
(
in
[
i
]);
if
(
tmp
>
shared_max_data
[
tid
]
)
{
if
(
tmp
>
local_max_data
)
{
shared_max_data
[
tid
]
=
tmp
;
local_max_data
=
tmp
;
}
}
}
}
shared_max_data
[
tid
]
=
local_max_data
;
}
else
{
}
else
{
if
(
bid
<
n
)
{
if
(
bid
<
n
)
{
shared_max_data
[
tid
]
=
abs
(
in
[
bid
]);
shared_max_data
[
tid
]
=
abs
(
in
[
bid
]);
...
@@ -83,13 +84,14 @@ __global__ void FindChannelAbsMaxKernelQuantAxis0(const T* in, const int n,
...
@@ -83,13 +84,14 @@ __global__ void FindChannelAbsMaxKernelQuantAxis0(const T* in, const int n,
int
channel_size
=
n
/
c
;
int
channel_size
=
n
/
c
;
const
T
*
in_c
=
in
+
blockIdx
.
x
*
channel_size
;
const
T
*
in_c
=
in
+
blockIdx
.
x
*
channel_size
;
extern
__shared__
T
shared_max_data
[];
extern
__shared__
T
shared_max_data
[];
shared_max_data
[
tid
]
=
T
(
0
);
T
local_max_data
=
T
(
0
);
for
(
int
i
=
tid
;
i
<
channel_size
;
i
+=
blockDim
.
x
)
{
for
(
int
i
=
tid
;
i
<
channel_size
;
i
+=
blockDim
.
x
)
{
T
tmp
=
fabs
(
in_c
[
i
]);
T
tmp
=
fabs
(
in_c
[
i
]);
if
(
tmp
>
shared_max_data
[
tid
]
)
{
if
(
tmp
>
local_max_data
)
{
shared_max_data
[
tid
]
=
tmp
;
local_max_data
=
tmp
;
}
}
}
}
shared_max_data
[
tid
]
=
local_max_data
;
__syncthreads
();
__syncthreads
();
for
(
int
i
=
blockDim
.
x
/
2
;
i
>
0
;
i
>>=
1
)
{
for
(
int
i
=
blockDim
.
x
/
2
;
i
>
0
;
i
>>=
1
)
{
if
(
tid
<
i
&&
(
shared_max_data
[
tid
]
<
shared_max_data
[
tid
+
i
]))
{
if
(
tid
<
i
&&
(
shared_max_data
[
tid
]
<
shared_max_data
[
tid
+
i
]))
{
...
@@ -113,13 +115,14 @@ __global__ void FindChannelAbsMaxKernelQuantAxis1(const T* in, const int n,
...
@@ -113,13 +115,14 @@ __global__ void FindChannelAbsMaxKernelQuantAxis1(const T* in, const int n,
int
tid
=
threadIdx
.
x
;
int
tid
=
threadIdx
.
x
;
int
bid
=
blockIdx
.
x
;
int
bid
=
blockIdx
.
x
;
const
T
*
in_current
=
in
+
tid
*
cout_wh_size
+
bid
*
wh_size
;
const
T
*
in_current
=
in
+
tid
*
cout_wh_size
+
bid
*
wh_size
;
shared_max_data
[
tid
]
=
T
(
0
);
T
local_max_data
=
T
(
0
);
for
(
int
i
=
0
;
i
<
wh_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
wh_size
;
i
++
)
{
T
tmp
=
fabs
(
in_current
[
i
]);
T
tmp
=
fabs
(
in_current
[
i
]);
if
(
tmp
>
shared_max_data
[
tid
]
)
{
if
(
tmp
>
local_max_data
)
{
shared_max_data
[
tid
]
=
tmp
;
local_max_data
=
tmp
;
}
}
}
}
shared_max_data
[
tid
]
=
local_max_data
;
__syncthreads
();
__syncthreads
();
int
len
=
blockDim
.
x
;
int
len
=
blockDim
.
x
;
...
@@ -404,6 +407,19 @@ struct FindRangeAbsMaxFunctor<platform::CUDADeviceContext, T> {
...
@@ -404,6 +407,19 @@ struct FindRangeAbsMaxFunctor<platform::CUDADeviceContext, T> {
}
}
};
};
template
<
typename
T
>
__global__
void
FindMovingAverageAbsMaxKernel
(
const
T
*
in_state
,
const
T
*
in_accum
,
const
T
*
cur_scale
,
const
T
rate
,
T
*
out_state
,
T
*
out_accum
,
T
*
out_scale
)
{
T
state
=
rate
*
(
*
in_state
)
+
T
(
1.0
f
);
T
accum
=
rate
*
(
*
in_accum
)
+
(
*
cur_scale
);
*
out_state
=
state
;
*
out_accum
=
accum
;
*
out_scale
=
accum
/
state
;
}
template
struct
FindRangeAbsMaxFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
struct
FindRangeAbsMaxFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
<
typename
T
>
template
<
typename
T
>
...
@@ -415,29 +431,14 @@ struct FindMovingAverageAbsMaxFunctor<platform::CUDADeviceContext, T> {
...
@@ -415,29 +431,14 @@ struct FindMovingAverageAbsMaxFunctor<platform::CUDADeviceContext, T> {
framework
::
Tensor
*
out_accum
,
framework
::
Tensor
*
out_scale
)
{
framework
::
Tensor
*
out_accum
,
framework
::
Tensor
*
out_scale
)
{
const
auto
gpu_place
=
ctx
.
GetPlace
();
const
auto
gpu_place
=
ctx
.
GetPlace
();
T
accum
;
T
state
;
T
scale
;
memory
::
Copy
(
platform
::
CPUPlace
(),
&
accum
,
gpu_place
,
in_accum
.
data
<
T
>
(),
sizeof
(
T
),
ctx
.
stream
());
memory
::
Copy
(
platform
::
CPUPlace
(),
&
state
,
gpu_place
,
in_state
.
data
<
T
>
(),
sizeof
(
T
),
ctx
.
stream
());
memory
::
Copy
(
platform
::
CPUPlace
(),
&
scale
,
gpu_place
,
cur_scale
,
sizeof
(
T
),
ctx
.
stream
());
ctx
.
Wait
();
T
rate_t
=
static_cast
<
T
>
(
rate
);
T
rate_t
=
static_cast
<
T
>
(
rate
);
state
=
rate_t
*
state
+
static_cast
<
T
>
(
1.0
);
T
*
out_state_data
=
out_state
->
mutable_data
<
T
>
(
gpu_place
);
accum
=
rate_t
*
accum
+
scale
;
T
*
out_accum_data
=
out_accum
->
mutable_data
<
T
>
(
gpu_place
);
scale
=
accum
/
state
;
T
*
out_scale_data
=
out_scale
->
mutable_data
<
T
>
(
gpu_place
);
memory
::
Copy
(
gpu_place
,
out_accum
->
mutable_data
<
T
>
(
gpu_place
),
FindMovingAverageAbsMaxKernel
<
T
><<<
1
,
1
,
0
,
ctx
.
stream
()
>>>
(
platform
::
CPUPlace
(),
&
accum
,
sizeof
(
T
),
ctx
.
stream
());
in_state
.
data
<
T
>
(),
in_accum
.
data
<
T
>
(),
cur_scale
,
rate_t
,
memory
::
Copy
(
gpu_place
,
out_state
->
mutable_data
<
T
>
(
gpu_place
),
out_state_data
,
out_accum_data
,
out_scale_data
);
platform
::
CPUPlace
(),
&
state
,
sizeof
(
T
),
ctx
.
stream
());
memory
::
Copy
(
gpu_place
,
out_scale
->
mutable_data
<
T
>
(
gpu_place
),
platform
::
CPUPlace
(),
&
scale
,
sizeof
(
T
),
ctx
.
stream
());
ctx
.
Wait
();
}
}
};
};
...
...
paddle/fluid/operators/grid_sampler_op.cc
浏览文件 @
4be77e53
...
@@ -15,9 +15,13 @@ limitations under the License. */
...
@@ -15,9 +15,13 @@ limitations under the License. */
#include <memory>
#include <memory>
#include <string>
#include <string>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/backward.h"
#include "paddle/phi/infermeta/binary.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -27,43 +31,6 @@ using Tensor = framework::Tensor;
...
@@ -27,43 +31,6 @@ using Tensor = framework::Tensor;
class
GridSampleOp
:
public
framework
::
OperatorWithKernel
{
class
GridSampleOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"X"
),
"Input"
,
"X"
,
"GridSampler"
);
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"Grid"
),
"Input"
,
"Grid"
,
"GridSampler"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Output"
),
"Output"
,
"Output"
,
"GridSampler"
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
grid_dims
=
ctx
->
GetInputDim
(
"Grid"
);
PADDLE_ENFORCE_EQ
(
x_dims
.
size
(),
4
,
platform
::
errors
::
InvalidArgument
(
"Input(X) of GridSampleOp should be 4-D Tensor, but "
"received X dimension size(%d)"
,
x_dims
.
size
()));
PADDLE_ENFORCE_EQ
(
grid_dims
.
size
(),
4
,
platform
::
errors
::
InvalidArgument
(
"Input(Grid) of GridSampleOp should be 4-D Tensor, "
"but received X dimension size(%d)"
,
grid_dims
.
size
()));
if
(
ctx
->
IsRuntime
()
||
grid_dims
[
3
]
>
0
)
{
PADDLE_ENFORCE_EQ
(
grid_dims
[
3
],
2
,
platform
::
errors
::
InvalidArgument
(
"Input(Grid) dimension[3] should be 2, but received %d"
,
grid_dims
[
3
]));
}
if
(
ctx
->
IsRuntime
())
{
PADDLE_ENFORCE_EQ
(
grid_dims
[
0
],
x_dims
[
0
],
platform
::
errors
::
InvalidArgument
(
"Input(X) and Input(Grid) dimension[0] should be equal, but "
"received X dimension[0](%d) != Grid dimension[0](%d)"
,
x_dims
[
0
],
grid_dims
[
0
]));
}
ctx
->
SetOutputDim
(
"Output"
,
{
x_dims
[
0
],
x_dims
[
1
],
grid_dims
[
1
],
grid_dims
[
2
]});
ctx
->
ShareLoD
(
"X"
,
"Output"
);
}
protected:
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
...
@@ -173,18 +140,6 @@ class GridSampleOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -173,18 +140,6 @@ class GridSampleOpMaker : public framework::OpProtoAndCheckerMaker {
class
GridSampleOpGrad
:
public
framework
::
OperatorWithKernel
{
class
GridSampleOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)),
"Output"
,
framework
::
GradVarName
(
"X"
),
"grid_sampler"
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
grid_dims
=
ctx
->
GetInputDim
(
"Grid"
);
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
input_dims
);
}
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Grid"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Grid"
),
grid_dims
);
}
}
protected:
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
...
@@ -224,10 +179,16 @@ class GridSampleGradMaker : public framework::SingleGradOpMaker<T> {
...
@@ -224,10 +179,16 @@ class GridSampleGradMaker : public framework::SingleGradOpMaker<T> {
}
// namespace paddle
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
DECLARE_INFER_SHAPE_FUNCTOR
(
grid_sampler
,
GridSamplerInferShapeFunctor
,
PD_INFER_META
(
phi
::
GridSampleBaseInferMeta
));
REGISTER_OPERATOR
(
grid_sampler
,
ops
::
GridSampleOp
,
ops
::
GridSampleOpMaker
,
REGISTER_OPERATOR
(
grid_sampler
,
ops
::
GridSampleOp
,
ops
::
GridSampleOpMaker
,
ops
::
GridSampleGradMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
GridSampleGradMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
GridSampleGradMaker
<
paddle
::
imperative
::
OpBase
>
);
ops
::
GridSampleGradMaker
<
paddle
::
imperative
::
OpBase
>
,
REGISTER_OPERATOR
(
grid_sampler_grad
,
ops
::
GridSampleOpGrad
);
GridSamplerInferShapeFunctor
);
DECLARE_INFER_SHAPE_FUNCTOR
(
grid_sampler_grad
,
GridSamplerGradInferShapeFunctor
,
PD_INFER_META
(
phi
::
GeneralBinaryGradInferMeta
));
REGISTER_OPERATOR
(
grid_sampler_grad
,
ops
::
GridSampleOpGrad
,
GridSamplerGradInferShapeFunctor
);
REGISTER_OP_VERSION
(
grid_sampler
)
REGISTER_OP_VERSION
(
grid_sampler
)
.
AddCheckpoint
(
.
AddCheckpoint
(
...
...
paddle/fluid/pybind/eager_functions.cc
浏览文件 @
4be77e53
...
@@ -122,13 +122,33 @@ static PyObject* eager_api_run_backward(PyObject* self, PyObject* args,
...
@@ -122,13 +122,33 @@ static PyObject* eager_api_run_backward(PyObject* self, PyObject* args,
EAGER_TRY
EAGER_TRY
auto
tensors
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
0
),
0
);
auto
tensors
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
0
),
0
);
auto
grad_tensors
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
1
),
1
);
auto
grad_tensors
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
1
),
1
);
egr
::
Run
Backward
(
tensors
,
grad_tensors
,
egr
::
Backward
(
tensors
,
grad_tensors
,
CastPyArg2AttrBoolean
(
PyTuple_GET_ITEM
(
args
,
2
),
2
));
CastPyArg2AttrBoolean
(
PyTuple_GET_ITEM
(
args
,
2
),
2
));
Py_INCREF
(
Py_None
);
Py_INCREF
(
Py_None
);
return
Py_None
;
return
Py_None
;
EAGER_CATCH_AND_THROW_RETURN_NULL
EAGER_CATCH_AND_THROW_RETURN_NULL
}
}
static
PyObject
*
eager_api_run_partial_grad
(
PyObject
*
self
,
PyObject
*
args
,
PyObject
*
kwargs
)
{
EAGER_TRY
auto
tensors
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
0
),
0
);
auto
inputs
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
1
),
1
);
auto
grad_tensors
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
2
),
2
);
auto
retain_graph
=
CastPyArg2AttrBoolean
(
PyTuple_GET_ITEM
(
args
,
3
),
3
);
auto
create_graph
=
CastPyArg2AttrBoolean
(
PyTuple_GET_ITEM
(
args
,
4
),
4
);
auto
only_inputs
=
CastPyArg2AttrBoolean
(
PyTuple_GET_ITEM
(
args
,
5
),
5
);
auto
allow_unused
=
CastPyArg2AttrBoolean
(
PyTuple_GET_ITEM
(
args
,
6
),
6
);
auto
no_grad_vars
=
CastPyArg2VectorOfTensor
(
PyTuple_GET_ITEM
(
args
,
7
),
7
);
std
::
vector
<
paddle
::
experimental
::
Tensor
>
result
=
egr
::
Grad
(
tensors
,
inputs
,
grad_tensors
,
retain_graph
,
create_graph
,
only_inputs
,
allow_unused
,
no_grad_vars
);
VLOG
(
1
)
<<
" in eager_api_run_partial_grad, after runing egr::Grad"
;
return
ToPyObject
(
result
,
true
/* return_py_none_if_not_initialize */
);
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static
PyObject
*
eager_api_tensor_copy
(
PyObject
*
self
,
PyObject
*
args
,
static
PyObject
*
eager_api_tensor_copy
(
PyObject
*
self
,
PyObject
*
args
,
PyObject
*
kwargs
)
{
PyObject
*
kwargs
)
{
EAGER_TRY
EAGER_TRY
...
@@ -452,6 +472,9 @@ PyMethodDef variable_functions[] = {
...
@@ -452,6 +472,9 @@ PyMethodDef variable_functions[] = {
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
{
"run_backward"
,
(
PyCFunction
)(
void
(
*
)(
void
))
eager_api_run_backward
,
{
"run_backward"
,
(
PyCFunction
)(
void
(
*
)(
void
))
eager_api_run_backward
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
{
"run_partial_grad"
,
(
PyCFunction
)(
void
(
*
)(
void
))
eager_api_run_partial_grad
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
{
"_run_custom_op"
,
(
PyCFunction
)(
void
(
*
)(
void
))
eager_api_run_costum_op
,
{
"_run_custom_op"
,
(
PyCFunction
)(
void
(
*
)(
void
))
eager_api_run_costum_op
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
{
"tensor_copy"
,
(
PyCFunction
)(
void
(
*
)(
void
))
eager_api_tensor_copy
,
{
"tensor_copy"
,
(
PyCFunction
)(
void
(
*
)(
void
))
eager_api_tensor_copy
,
...
...
paddle/fluid/pybind/eager_method.cc
浏览文件 @
4be77e53
...
@@ -226,6 +226,19 @@ static PyObject* tensor_method__copy_to(TensorObject* self, PyObject* args,
...
@@ -226,6 +226,19 @@ static PyObject* tensor_method__copy_to(TensorObject* self, PyObject* args,
EAGER_CATCH_AND_THROW_RETURN_NULL
EAGER_CATCH_AND_THROW_RETURN_NULL
}
}
static
PyObject
*
tensor_method_cpu
(
TensorObject
*
self
,
PyObject
*
args
,
PyObject
*
kwargs
)
{
EAGER_TRY
auto
cp_tensor
=
self
->
tensor
.
copy_to
(
phi
::
TransToPhiBackend
(
phi
::
CPUPlace
()),
true
);
egr
::
EagerUtils
::
autograd_meta
(
&
cp_tensor
)
->
SetStopGradient
(
true
);
egr
::
EagerUtils
::
autograd_meta
(
&
cp_tensor
)
->
SetPersistable
(
egr
::
EagerUtils
::
autograd_meta
(
&
(
self
->
tensor
))
->
Persistable
());
return
ToPyObject
(
cp_tensor
);
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static
PyObject
*
tensor_method_reconstruct_from_
(
TensorObject
*
self
,
static
PyObject
*
tensor_method_reconstruct_from_
(
TensorObject
*
self
,
PyObject
*
args
,
PyObject
*
args
,
PyObject
*
kwargs
)
{
PyObject
*
kwargs
)
{
...
@@ -264,7 +277,7 @@ static PyObject* tensor_method_copy_(TensorObject* self, PyObject* args,
...
@@ -264,7 +277,7 @@ static PyObject* tensor_method_copy_(TensorObject* self, PyObject* args,
egr
::
EagerUtils
::
autograd_meta
(
&
(
src_tensor
))
->
Persistable
());
egr
::
EagerUtils
::
autograd_meta
(
&
(
src_tensor
))
->
Persistable
());
}
}
self
->
tensor
.
copy_
(
src_tensor
,
blocking
);
self
->
tensor
.
copy_
(
src_tensor
,
self
->
tensor
.
inner_place
(),
blocking
);
VLOG
(
6
)
<<
"Finish Copy Tensor "
<<
src_tensor
.
name
()
<<
" to "
VLOG
(
6
)
<<
"Finish Copy Tensor "
<<
src_tensor
.
name
()
<<
" to "
<<
self
->
tensor
.
name
();
<<
self
->
tensor
.
name
();
...
...
paddle/fluid/pybind/eager_properties.cc
浏览文件 @
4be77e53
...
@@ -96,7 +96,7 @@ int tensor_properties_set_grad(TensorObject* self, PyObject* value,
...
@@ -96,7 +96,7 @@ int tensor_properties_set_grad(TensorObject* self, PyObject* value,
"Detected NULL grad"
"Detected NULL grad"
"Please check if you have manually cleared"
"Please check if you have manually cleared"
"the grad inside autograd_meta"
));
"the grad inside autograd_meta"
));
grad
->
copy_
(
src
,
true
);
grad
->
copy_
(
src
,
self
->
tensor
.
inner_place
(),
true
);
return
0
;
return
0
;
EAGER_CATCH_AND_THROW_RETURN_ZERO
EAGER_CATCH_AND_THROW_RETURN_ZERO
}
}
...
...
paddle/fluid/pybind/eager_utils.cc
浏览文件 @
4be77e53
...
@@ -492,20 +492,26 @@ PyObject* ToPyObject(const std::vector<double>& value) {
...
@@ -492,20 +492,26 @@ PyObject* ToPyObject(const std::vector<double>& value) {
return
result
;
return
result
;
}
}
PyObject
*
ToPyObject
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
value
)
{
PyObject
*
ToPyObject
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
value
,
bool
return_py_none_if_not_initialize
)
{
PyObject
*
result
=
PyList_New
((
Py_ssize_t
)
value
.
size
());
PyObject
*
result
=
PyList_New
((
Py_ssize_t
)
value
.
size
());
for
(
size_t
i
=
0
;
i
<
value
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
value
.
size
();
i
++
)
{
PyObject
*
obj
=
p_tensor_type
->
tp_alloc
(
p_tensor_type
,
0
);
if
(
!
value
[
i
].
initialized
()
&&
return_py_none_if_not_initialize
)
{
if
(
obj
)
{
Py_INCREF
(
Py_None
);
auto
v
=
reinterpret_cast
<
TensorObject
*>
(
obj
);
PyList_SET_ITEM
(
result
,
static_cast
<
Py_ssize_t
>
(
i
),
Py_None
);
new
(
&
(
v
->
tensor
))
paddle
::
experimental
::
Tensor
();
v
->
tensor
=
value
[
i
];
}
else
{
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Fatal
(
PyObject
*
obj
=
p_tensor_type
->
tp_alloc
(
p_tensor_type
,
0
);
"tp_alloc return null, can not new a PyObject."
));
if
(
obj
)
{
auto
v
=
reinterpret_cast
<
TensorObject
*>
(
obj
);
new
(
&
(
v
->
tensor
))
paddle
::
experimental
::
Tensor
();
v
->
tensor
=
value
[
i
];
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Fatal
(
"tp_alloc return null, can not new a PyObject."
));
}
PyList_SET_ITEM
(
result
,
static_cast
<
Py_ssize_t
>
(
i
),
obj
);
}
}
PyList_SET_ITEM
(
result
,
static_cast
<
Py_ssize_t
>
(
i
),
obj
);
}
}
return
result
;
return
result
;
...
...
paddle/fluid/pybind/eager_utils.h
浏览文件 @
4be77e53
...
@@ -68,7 +68,8 @@ PyObject* ToPyObject(const std::vector<int>& value);
...
@@ -68,7 +68,8 @@ PyObject* ToPyObject(const std::vector<int>& value);
PyObject
*
ToPyObject
(
const
std
::
vector
<
int64_t
>&
value
);
PyObject
*
ToPyObject
(
const
std
::
vector
<
int64_t
>&
value
);
PyObject
*
ToPyObject
(
const
std
::
vector
<
float
>&
value
);
PyObject
*
ToPyObject
(
const
std
::
vector
<
float
>&
value
);
PyObject
*
ToPyObject
(
const
std
::
vector
<
double
>&
value
);
PyObject
*
ToPyObject
(
const
std
::
vector
<
double
>&
value
);
PyObject
*
ToPyObject
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
value
);
PyObject
*
ToPyObject
(
const
std
::
vector
<
paddle
::
experimental
::
Tensor
>&
value
,
bool
return_py_none_if_not_initialize
=
false
);
PyObject
*
ToPyObject
(
const
platform
::
Place
&
value
);
PyObject
*
ToPyObject
(
const
platform
::
Place
&
value
);
PyObject
*
ToPyObject
(
const
framework
::
LoDTensor
*
value
);
PyObject
*
ToPyObject
(
const
framework
::
LoDTensor
*
value
);
PyObject
*
ToPyObject
(
const
paddle
::
framework
::
proto
::
VarType
::
Type
&
dtype
);
PyObject
*
ToPyObject
(
const
paddle
::
framework
::
proto
::
VarType
::
Type
&
dtype
);
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
4be77e53
...
@@ -551,6 +551,9 @@ void BindAnalysisConfig(py::module *m) {
...
@@ -551,6 +551,9 @@ void BindAnalysisConfig(py::module *m) {
.
def
(
"params_file"
,
&
AnalysisConfig
::
params_file
)
.
def
(
"params_file"
,
&
AnalysisConfig
::
params_file
)
.
def
(
"enable_use_gpu"
,
&
AnalysisConfig
::
EnableUseGpu
,
.
def
(
"enable_use_gpu"
,
&
AnalysisConfig
::
EnableUseGpu
,
py
::
arg
(
"memory_pool_init_size_mb"
),
py
::
arg
(
"device_id"
)
=
0
)
py
::
arg
(
"memory_pool_init_size_mb"
),
py
::
arg
(
"device_id"
)
=
0
)
.
def
(
"exp_enable_use_gpu_fp16"
,
&
AnalysisConfig
::
Exp_EnableUseGpuFp16
,
py
::
arg
(
"gpu_fp16_disabled_op_types"
)
=
std
::
unordered_set
<
std
::
string
>
({}))
.
def
(
"enable_xpu"
,
&
AnalysisConfig
::
EnableXpu
,
.
def
(
"enable_xpu"
,
&
AnalysisConfig
::
EnableXpu
,
py
::
arg
(
"l3_workspace_size"
)
=
16
*
1024
*
1024
,
py
::
arg
(
"l3_workspace_size"
)
=
16
*
1024
*
1024
,
py
::
arg
(
"locked"
)
=
false
,
py
::
arg
(
"autotune"
)
=
true
,
py
::
arg
(
"locked"
)
=
false
,
py
::
arg
(
"autotune"
)
=
true
,
...
...
paddle/infrt/CMakeLists.txt
浏览文件 @
4be77e53
...
@@ -3,12 +3,22 @@ if (NOT WITH_INFRT)
...
@@ -3,12 +3,22 @@ if (NOT WITH_INFRT)
endif
()
endif
()
option
(
INFRT_WITH_PHI
"Compile INFRT with PHI"
ON
)
option
(
INFRT_WITH_PHI
"Compile INFRT with PHI"
ON
)
option
(
INFRT_WITH_GPU
"Compile INFRT with GPU"
OFF
)
option
(
INFRT_WITH_TRT
"Compile INFRT with TensorRT"
OFF
)
#TODO(xiaowei) remove fluid
#TODO(xiaowei) remove fluid
include_directories
(
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/platform
)
include_directories
(
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/platform
)
if
(
INFRT_WITH_PHI
)
if
(
INFRT_WITH_PHI
)
add_definitions
(
"-DINFRT_WITH_PHI"
)
add_definitions
(
"-DINFRT_WITH_PHI"
)
# TODO(wilber): Now Infrt gpu/trt depends on phi's components, Modify compile dependency options later.
if
(
INFRT_WITH_GPU
)
add_definitions
(
"-DINFRT_WITH_GPU"
)
if
(
INFRT_WITH_TRT
)
add_definitions
(
"-DINFRT_WITH_TRT"
)
endif
()
endif
()
endif
()
endif
()
# compile flags
# compile flags
...
@@ -92,7 +102,6 @@ set(infrt_mlir_incs
...
@@ -92,7 +102,6 @@ set(infrt_mlir_incs
test_kernels_inc
test_kernels_inc
tensor_shape_inc
tensor_shape_inc
dense_tensor_inc
dense_tensor_inc
pd_ops_inc
pd_extra_ops_inc
pd_extra_ops_inc
trt_ops_inc
trt_ops_inc
)
)
...
@@ -106,6 +115,9 @@ if (INFRT_WITH_PHI)
...
@@ -106,6 +115,9 @@ if (INFRT_WITH_PHI)
endif
()
endif
()
cc_library
(
infrt SHARED SRCS
${
infrt_src
}
DEPS glog boost
${
mlir_libs
}
${
phi_libs
}
paddle_framework_proto infrt_naive
)
cc_library
(
infrt SHARED SRCS
${
infrt_src
}
DEPS glog boost
${
mlir_libs
}
${
phi_libs
}
paddle_framework_proto infrt_naive
)
if
(
INFRT_WITH_TRT
)
target_link_libraries
(
infrt infrt_trt
)
endif
()
cc_library
(
infrt_static SRCS
${
infrt_src
}
DEPS glog boost
${
mlir_libs
}
${
phi_libs
}
paddle_framework_proto
)
cc_library
(
infrt_static SRCS
${
infrt_src
}
DEPS glog boost
${
mlir_libs
}
${
phi_libs
}
paddle_framework_proto
)
add_dependencies
(
infrt
${
infrt_mlir_incs
}
mlir-headers
)
add_dependencies
(
infrt
${
infrt_mlir_incs
}
mlir-headers
)
...
...
paddle/infrt/backends/host/phi_allocator.h
浏览文件 @
4be77e53
...
@@ -13,6 +13,10 @@ limitations under the License. */
...
@@ -13,6 +13,10 @@ limitations under the License. */
#include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/allocator.h"
#ifdef INFRT_WITH_GPU
#include <cuda_runtime.h>
#endif
namespace
infrt
{
namespace
infrt
{
namespace
backends
{
namespace
backends
{
...
@@ -29,5 +33,22 @@ class CpuPhiAllocator : public phi::Allocator {
...
@@ -29,5 +33,22 @@ class CpuPhiAllocator : public phi::Allocator {
}
}
};
};
#ifdef INFRT_WITH_GPU
// TODO(wilber): Just for demo test. we need a more efficient gpu allocator.
class
GpuPhiAllocator
:
public
phi
::
Allocator
{
public:
static
void
deleter
(
phi
::
Allocation
*
ptr
)
{
cudaFree
(
ptr
->
ptr
());
}
AllocationPtr
Allocate
(
size_t
bytes_size
)
{
void
*
ptr
;
cudaMalloc
(
&
ptr
,
bytes_size
);
return
AllocationPtr
(
new
phi
::
Allocation
(
ptr
,
bytes_size
,
phi
::
Place
(
phi
::
AllocationType
::
GPU
)),
deleter
);
}
};
#endif
}
// namespace backends
}
// namespace backends
}
// namespace infrt
}
// namespace infrt
paddle/infrt/backends/host/phi_context.h
浏览文件 @
4be77e53
...
@@ -13,6 +13,7 @@ limitations under the License. */
...
@@ -13,6 +13,7 @@ limitations under the License. */
#include "paddle/infrt/backends/host/phi_allocator.h"
#include "paddle/infrt/backends/host/phi_allocator.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
namespace
infrt
{
namespace
infrt
{
namespace
backends
{
namespace
backends
{
...
@@ -31,5 +32,16 @@ class CpuPhiContext : public phi::CPUContext {
...
@@ -31,5 +32,16 @@ class CpuPhiContext : public phi::CPUContext {
std
::
unique_ptr
<
phi
::
Allocator
>
alloc_
{
std
::
make_unique
<
CpuPhiAllocator
>
()};
std
::
unique_ptr
<
phi
::
Allocator
>
alloc_
{
std
::
make_unique
<
CpuPhiAllocator
>
()};
};
};
class
GpuPhiContext
:
public
phi
::
GPUContext
{
public:
using
Base
=
phi
::
GPUContext
;
using
phi
::
GPUContext
::
SetStream
;
using
phi
::
GPUContext
::
SetEigenDevice
;
using
phi
::
GPUContext
::
SetBlasHandle
;
using
phi
::
GPUContext
::
SetDnnHandle
;
using
phi
::
GPUContext
::
SetSolverHandle
;
using
phi
::
GPUContext
::
SetSparseHandle
;
};
}
// namespace backends
}
// namespace backends
}
// namespace infrt
}
// namespace infrt
paddle/infrt/backends/tensorrt/test_trt_engine.cc
浏览文件 @
4be77e53
...
@@ -37,9 +37,9 @@ namespace infrt {
...
@@ -37,9 +37,9 @@ namespace infrt {
namespace
backends
{
namespace
backends
{
namespace
tensorrt
{
namespace
tensorrt
{
const
char
*
model_input
=
"
model_input
"
;
const
char
*
model_input
=
"
input_0
"
;
const
char
*
model_output
=
"
model_output1
"
;
const
char
*
model_output
=
"
output_0
"
;
const
char
*
model_output2
=
"
model_output2
"
;
const
char
*
model_output2
=
"
output_1
"
;
TrtUniquePtr
<
nvinfer1
::
INetworkDefinition
>
ConstructNetwork
(
TrtUniquePtr
<
nvinfer1
::
INetworkDefinition
>
ConstructNetwork
(
nvinfer1
::
IBuilder
*
builder
,
nvinfer1
::
Dims
dims
,
bool
is_static_shape
)
{
nvinfer1
::
IBuilder
*
builder
,
nvinfer1
::
Dims
dims
,
bool
is_static_shape
)
{
...
@@ -122,27 +122,26 @@ TEST(trt, run_static) {
...
@@ -122,27 +122,26 @@ TEST(trt, run_static) {
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>
inputs
;
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>
inputs
;
inputs
.
emplace
(
std
::
make_pair
(
model_input
,
&
input
));
inputs
.
emplace
(
std
::
make_pair
(
model_input
,
&
input
));
phi
::
DenseTensor
output
,
output2
;
static_trt_engine
.
PrepareOutputHandle
(
"output_0"
);
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>
outputs
;
static_trt_engine
.
PrepareOutputHandle
(
"output_1"
);
outputs
.
emplace
(
std
::
make_pair
(
model_output
,
&
output
));
static_trt_engine
.
SetUpInference
(
inference_options
,
inputs
);
outputs
.
emplace
(
std
::
make_pair
(
model_output2
,
&
output2
));
static_trt_engine
.
SetUpInference
(
inference_options
,
inputs
,
&
outputs
);
static_trt_engine
.
GetEngineInfo
();
static_trt_engine
.
GetEngineInfo
();
static_trt_engine
.
Run
(
context
);
static_trt_engine
.
Run
(
context
);
phi
::
DenseTensor
*
output0
=
static_trt_engine
.
GetOutput
(
"output_0"
);
phi
::
DenseTensor
*
output1
=
static_trt_engine
.
GetOutput
(
"output_1"
);
std
::
vector
<
float
>
output_data1
(
inference_options
.
batch
*
1
*
28
*
28
,
0
);
std
::
vector
<
float
>
output_data1
(
inference_options
.
batch
*
1
*
28
*
28
,
0
);
std
::
vector
<
float
>
output_data2
(
inference_options
.
batch
*
2
*
28
*
28
,
0
);
std
::
vector
<
float
>
output_data2
(
inference_options
.
batch
*
2
*
28
*
28
,
0
);
paddle
::
memory
::
Copy
(
phi
::
CPUPlace
(),
paddle
::
memory
::
Copy
(
phi
::
CPUPlace
(),
output_data1
.
data
(),
output_data1
.
data
(),
place
,
place
,
output
.
data
<
float
>
(),
output
0
->
data
<
float
>
(),
sizeof
(
float
)
*
output_data1
.
size
(),
sizeof
(
float
)
*
output_data1
.
size
(),
context
.
stream
());
context
.
stream
());
paddle
::
memory
::
Copy
(
phi
::
CPUPlace
(),
paddle
::
memory
::
Copy
(
phi
::
CPUPlace
(),
output_data2
.
data
(),
output_data2
.
data
(),
place
,
place
,
output
2
.
data
<
float
>
(),
output
1
->
data
<
float
>
(),
sizeof
(
float
)
*
output_data2
.
size
(),
sizeof
(
float
)
*
output_data2
.
size
(),
context
.
stream
());
context
.
stream
());
cudaStreamSynchronize
(
context
.
stream
());
cudaStreamSynchronize
(
context
.
stream
());
...
@@ -208,27 +207,27 @@ TEST(trt, run_dynamic) {
...
@@ -208,27 +207,27 @@ TEST(trt, run_dynamic) {
context
.
stream
());
context
.
stream
());
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>
inputs
;
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>
inputs
;
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>
outputs
;
inputs
.
emplace
(
std
::
make_pair
(
model_input
,
&
input
));
inputs
.
emplace
(
std
::
make_pair
(
model_input
,
&
input
));
outputs
.
emplace
(
std
::
make_pair
(
model_output
,
&
output
));
engine
.
PrepareOutputHandle
(
"output_0"
);
outputs
.
emplace
(
std
::
make_pair
(
model_output2
,
&
output2
));
engine
.
PrepareOutputHandle
(
"output_1"
);
engine
.
SetUpInference
(
inference_options
,
inputs
);
engine
.
SetUpInference
(
inference_options
,
inputs
,
&
outputs
);
engine
.
GetEngineInfo
();
engine
.
GetEngineInfo
();
engine
.
Run
(
context
);
engine
.
Run
(
context
);
phi
::
DenseTensor
*
output0
=
engine
.
GetOutput
(
"output_0"
);
phi
::
DenseTensor
*
output1
=
engine
.
GetOutput
(
"output_1"
);
std
::
vector
<
float
>
output_data1
(
inference_options
.
batch
*
1
*
16
*
16
,
0
);
std
::
vector
<
float
>
output_data1
(
inference_options
.
batch
*
1
*
16
*
16
,
0
);
std
::
vector
<
float
>
output_data2
(
inference_options
.
batch
*
2
*
16
*
16
,
0
);
std
::
vector
<
float
>
output_data2
(
inference_options
.
batch
*
2
*
16
*
16
,
0
);
paddle
::
memory
::
Copy
(
phi
::
CPUPlace
(),
paddle
::
memory
::
Copy
(
phi
::
CPUPlace
(),
output_data1
.
data
(),
output_data1
.
data
(),
place
,
place
,
output
.
data
<
float
>
(),
output
0
->
data
<
float
>
(),
sizeof
(
float
)
*
output_data1
.
size
(),
sizeof
(
float
)
*
output_data1
.
size
(),
context
.
stream
());
context
.
stream
());
paddle
::
memory
::
Copy
(
phi
::
CPUPlace
(),
paddle
::
memory
::
Copy
(
phi
::
CPUPlace
(),
output_data2
.
data
(),
output_data2
.
data
(),
place
,
place
,
output
2
.
data
<
float
>
(),
output
1
->
data
<
float
>
(),
sizeof
(
float
)
*
output_data2
.
size
(),
sizeof
(
float
)
*
output_data2
.
size
(),
context
.
stream
());
context
.
stream
());
cudaStreamSynchronize
(
context
.
stream
());
cudaStreamSynchronize
(
context
.
stream
());
...
...
paddle/infrt/backends/tensorrt/trt_engine.cc
浏览文件 @
4be77e53
...
@@ -21,6 +21,7 @@
...
@@ -21,6 +21,7 @@
#include "paddle/phi/backends/dynload/tensorrt.h"
#include "paddle/phi/backends/dynload/tensorrt.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/dense_tensor.h"
namespace
infrt
{
namespace
infrt
{
namespace
backends
{
namespace
backends
{
...
@@ -235,10 +236,20 @@ bool TrtEngine::SetupNetworkAndConfig(const BuildOptions& build,
...
@@ -235,10 +236,20 @@ bool TrtEngine::SetupNetworkAndConfig(const BuildOptions& build,
return
true
;
return
true
;
}
}
void
TrtEngine
::
PrepareOutputHandle
(
const
std
::
string
&
out_name
)
{
phi
::
DenseTensor
t
;
outputs_
.
emplace
(
out_name
,
t
);
}
phi
::
DenseTensor
*
TrtEngine
::
GetOutput
(
const
std
::
string
&
name
)
{
return
&
outputs_
[
name
];
}
size_t
TrtEngine
::
GetOutputNum
()
const
{
return
outputs_
.
size
();
}
bool
TrtEngine
::
SetUpInference
(
bool
TrtEngine
::
SetUpInference
(
const
InferenceOptions
&
inference
,
const
InferenceOptions
&
inference
,
const
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>&
inputs
,
const
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>&
inputs
)
{
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>*
outputs
)
{
// TODO(wilber): now only create one exec_context
// TODO(wilber): now only create one exec_context
FreshDeviceId
();
FreshDeviceId
();
CHECK
(
engine_
!=
nullptr
);
CHECK
(
engine_
!=
nullptr
);
...
@@ -252,10 +263,10 @@ bool TrtEngine::SetUpInference(
...
@@ -252,10 +263,10 @@ bool TrtEngine::SetUpInference(
bindings_
.
front
()
->
AddBinding
(
bindings_
.
front
()
->
AddBinding
(
bind_index
,
it
.
first
,
true
,
it
.
second
,
nvinfer1
::
DataType
::
kFLOAT
);
bind_index
,
it
.
first
,
true
,
it
.
second
,
nvinfer1
::
DataType
::
kFLOAT
);
}
}
for
(
auto
&
it
:
*
outputs
)
{
for
(
auto
&
it
:
outputs_
)
{
const
int
bind_index
=
engine_
->
getBindingIndex
(
it
.
first
.
c_str
());
const
int
bind_index
=
engine_
->
getBindingIndex
(
it
.
first
.
c_str
());
bindings_
.
front
()
->
AddBinding
(
bindings_
.
front
()
->
AddBinding
(
bind_index
,
it
.
first
,
false
,
it
.
second
,
nvinfer1
::
DataType
::
kFLOAT
);
bind_index
,
it
.
first
,
false
,
&
it
.
second
,
nvinfer1
::
DataType
::
kFLOAT
);
}
}
return
true
;
return
true
;
...
@@ -290,11 +301,13 @@ void TrtEngine::StaticRun(const phi::GPUContext& ctx) {
...
@@ -290,11 +301,13 @@ void TrtEngine::StaticRun(const phi::GPUContext& ctx) {
const
int
bind_index
=
engine_
->
getBindingIndex
(
bind
.
name
.
c_str
());
const
int
bind_index
=
engine_
->
getBindingIndex
(
bind
.
name
.
c_str
());
std
::
vector
<
int32_t
>
ddim
;
std
::
vector
<
int32_t
>
ddim
;
auto
dims
=
engine_
->
getBindingDimensions
(
bind_index
);
auto
dims
=
engine_
->
getBindingDimensions
(
bind_index
);
CHECK_NE
(
runtime_batch
,
-
1
)
<<
"runtime_batch should not be -1."
;
ddim
.
push_back
(
runtime_batch
);
ddim
.
push_back
(
runtime_batch
);
for
(
int
i
=
0
;
i
<
dims
.
nbDims
;
++
i
)
{
for
(
int
i
=
0
;
i
<
dims
.
nbDims
;
++
i
)
{
ddim
.
push_back
(
dims
.
d
[
i
]);
ddim
.
push_back
(
dims
.
d
[
i
]);
}
}
bind
.
buffer
->
Resize
(
phi
::
make_ddim
(
ddim
));
bind
.
buffer
->
Resize
(
phi
::
make_ddim
(
ddim
));
// TODO(wilber): now only support float output.
ctx
.
Alloc
<
float
>
(
bind
.
buffer
,
sizeof
(
float
)
*
bind
.
buffer
->
numel
());
ctx
.
Alloc
<
float
>
(
bind
.
buffer
,
sizeof
(
float
)
*
bind
.
buffer
->
numel
());
buffers
[
bind_index
]
=
static_cast
<
void
*>
(
bind
.
buffer
->
data
<
float
>
());
buffers
[
bind_index
]
=
static_cast
<
void
*>
(
bind
.
buffer
->
data
<
float
>
());
}
}
...
...
paddle/infrt/backends/tensorrt/trt_engine.h
浏览文件 @
4be77e53
...
@@ -81,11 +81,17 @@ class TrtEngine {
...
@@ -81,11 +81,17 @@ class TrtEngine {
// TODO(wilber): How to support multiple execution contexts?
// TODO(wilber): How to support multiple execution contexts?
bool
SetUpInference
(
bool
SetUpInference
(
const
InferenceOptions
&
inference
,
const
InferenceOptions
&
inference
,
const
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>&
inputs
,
const
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>&
inputs
);
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>*
outputs
);
void
GetEngineInfo
();
void
GetEngineInfo
();
void
PrepareOutputHandle
(
const
std
::
string
&
out_name
);
// TODO(wilber): The output tensor names are: output_0, output_1, ...
phi
::
DenseTensor
*
GetOutput
(
const
std
::
string
&
);
size_t
GetOutputNum
()
const
;
private:
private:
void
FreshDeviceId
();
void
FreshDeviceId
();
...
@@ -112,6 +118,7 @@ class TrtEngine {
...
@@ -112,6 +118,7 @@ class TrtEngine {
std
::
vector
<
std
::
unique_ptr
<
Bindings
>>
bindings_
;
std
::
vector
<
std
::
unique_ptr
<
Bindings
>>
bindings_
;
int
device_id_
{
0
};
int
device_id_
{
0
};
bool
is_dynamic_shape_
{
false
};
bool
is_dynamic_shape_
{
false
};
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
>
outputs_
;
};
};
}
// namespace tensorrt
}
// namespace tensorrt
...
...
paddle/infrt/dialect/CMakeLists.txt
浏览文件 @
4be77e53
...
@@ -7,16 +7,10 @@ gather_srcs(infrt_src SRCS
...
@@ -7,16 +7,10 @@ gather_srcs(infrt_src SRCS
dense_tensor.cc
dense_tensor.cc
mlir_loader.cc
mlir_loader.cc
diagnostic_utils.cc
diagnostic_utils.cc
pd_ops.cc
)
)
mlir_tablegen_on
(
tensor_shape DIALECT ts
)
mlir_tablegen_on
(
tensor_shape DIALECT ts
)
mlir_tablegen_on
(
dense_tensor DIALECT dt
)
mlir_tablegen_on
(
dense_tensor DIALECT dt
)
mlir_tablegen_on
(
pd_op_base DIALECT pd
)
mlir_tablegen_on
(
pd_ops
)
mlir_tablegen_on
(
pd_extra_ops
)
mlir_add_rewriter
(
rewrite
)
# TODO(Superjomn) add a cmake function cc_executable to ecapsulate the following code
# TODO(Superjomn) add a cmake function cc_executable to ecapsulate the following code
add_executable
(
infrtopt opt.cc
)
add_executable
(
infrtopt opt.cc
)
...
@@ -24,10 +18,10 @@ target_link_libraries(infrtopt infrt)
...
@@ -24,10 +18,10 @@ target_link_libraries(infrtopt infrt)
add_executable
(
print-ir print_ir.cc
)
add_executable
(
print-ir print_ir.cc
)
target_link_libraries
(
print-ir infrt
${
mlir_libs
}
)
target_link_libraries
(
print-ir infrt
${
mlir_libs
}
)
add_dependencies
(
print-ir pd_ops_inc
)
cc_test_tiny
(
test_infrt_mlir_loader SRCS mlir_loader_test.cc DEPS infrt
${
MLIR_IR_LIBS
}
)
cc_test_tiny
(
test_infrt_mlir_loader SRCS mlir_loader_test.cc DEPS infrt
${
MLIR_IR_LIBS
}
)
add_subdirectory
(
infrt
)
add_subdirectory
(
infrt
)
add_subdirectory
(
pd
)
add_subdirectory
(
tensorrt
)
add_subdirectory
(
tensorrt
)
if
(
INFRT_WITH_PHI
)
if
(
INFRT_WITH_PHI
)
...
...
paddle/infrt/dialect/dense_tensor.td
浏览文件 @
4be77e53
...
@@ -130,7 +130,7 @@ def TensorMapGetTensorOp : DT_Op<"tensor_map_get_tensor", [NoSideEffect]> {
...
@@ -130,7 +130,7 @@ def TensorMapGetTensorOp : DT_Op<"tensor_map_get_tensor", [NoSideEffect]> {
}
}
def TensorMapGetSizeOp : DT_Op<"tensor_map_get_size", [NoSideEffect]> {
def TensorMapGetSizeOp : DT_Op<"tensor_map_get_size", [NoSideEffect]> {
let summary = "d
d
t.tensor_map_get_size operation";
let summary = "dt.tensor_map_get_size operation";
let description = [{
let description = [{
An operation that get the size of a TensorMap.
An operation that get the size of a TensorMap.
...
@@ -141,6 +141,32 @@ def TensorMapGetSizeOp : DT_Op<"tensor_map_get_size", [NoSideEffect]> {
...
@@ -141,6 +141,32 @@ def TensorMapGetSizeOp : DT_Op<"tensor_map_get_size", [NoSideEffect]> {
let assemblyFormat = "`(` $map `)` attr-dict `->` type($size)";
let assemblyFormat = "`(` $map `)` attr-dict `->` type($size)";
}
}
def Infrt_TensorListGetTensorOp : DT_Op<"tensor_list_get_tensor", [NoSideEffect]> {
let summary = "dt.tensor_list_get_tensor operation";
let description = [{
An operation that can get a tensor from a TensorList.
}];
let arguments = (ins
DenseTensorList:$l,
I32Attr:$id
);
let results = (outs DenseTensor:$output);
let verifier = ?;
}
def TensorListGetSizeOp : DT_Op<"tensor_list_get_size", [NoSideEffect]> {
let summary = "dt.tensor_list_get_size operation";
let description = [{
An operation that get the size of a TensorList.
}];
let arguments = (ins DenseTensorList:$map);
let results = (outs I32:$size);
}
def GetTensorShapeOp : DT_Op<"get_tensor_shape", [NoSideEffect]> {
def GetTensorShapeOp : DT_Op<"get_tensor_shape", [NoSideEffect]> {
let summary = "dt.get_tensor_shape operation";
let summary = "dt.get_tensor_shape operation";
...
...
paddle/infrt/dialect/infrt/ir/infrt_base.td
浏览文件 @
4be77e53
...
@@ -89,6 +89,13 @@ def DenseTensorMap : Infrt_Type<"DenseTensorMap"> {
...
@@ -89,6 +89,13 @@ def DenseTensorMap : Infrt_Type<"DenseTensorMap"> {
let parameters = (ins);
let parameters = (ins);
}
}
// TODO(wilber): Add !infrt.vec type.
def DenseTensorList : Infrt_Type<"DenseTensorList"> {
let summary = "infrt dense tensor map";
let description = [{dense_tensor map}];
let parameters = (ins);
}
// Type Constrait for concrete DenseTensor type.
// Type Constrait for concrete DenseTensor type.
class DenseTensor<string target, string precision, string layout> :
class DenseTensor<string target, string precision, string layout> :
Type<CPred<"$_self == ::infrt::DenseTensorType::get($_self.getContext(), ::infrt::TargetType::"#target#",::infrt::PrecisionType::"#precision#",::infrt::LayoutType::"#layout#")">,
Type<CPred<"$_self == ::infrt::DenseTensorType::get($_self.getContext(), ::infrt::TargetType::"#target#",::infrt::PrecisionType::"#precision#",::infrt::LayoutType::"#layout#")">,
...
...
paddle/infrt/dialect/infrt/ir/infrt_dialect.cc
浏览文件 @
4be77e53
...
@@ -138,6 +138,10 @@ mlir::Type InfrtDialect::parseType(::mlir::DialectAsmParser &parser) const {
...
@@ -138,6 +138,10 @@ mlir::Type InfrtDialect::parseType(::mlir::DialectAsmParser &parser) const {
parser
.
getContext
(),
*
targetType
,
*
precisionType
,
*
layoutType
);
parser
.
getContext
(),
*
targetType
,
*
precisionType
,
*
layoutType
);
}
}
if
(
keyword
==
"tensor_list"
)
{
return
infrt
::
DenseTensorListType
::
get
(
parser
.
getContext
());
}
if
(
keyword
==
"dense_tensor_map"
)
{
if
(
keyword
==
"dense_tensor_map"
)
{
return
DenseTensorMapType
::
get
(
parser
.
getContext
());
return
DenseTensorMapType
::
get
(
parser
.
getContext
());
}
}
...
@@ -175,6 +179,9 @@ void InfrtDialect::printType(::mlir::Type type,
...
@@ -175,6 +179,9 @@ void InfrtDialect::printType(::mlir::Type type,
return
;
return
;
}
}
if
(
type
.
isa
<
infrt
::
DenseTensorListType
>
())
{
os
<<
"tensor_list"
;
}
// print DenseTensorType, for example: !infrt.dense_tensor<CPU, FP32, NCHW>
// print DenseTensorType, for example: !infrt.dense_tensor<CPU, FP32, NCHW>
if
(
type
.
isa
<
DenseTensorMapType
>
())
{
if
(
type
.
isa
<
DenseTensorMapType
>
())
{
os
<<
"dense_tensor_map"
;
os
<<
"dense_tensor_map"
;
...
...
paddle/infrt/dialect/infrt/pass/infrt_op_fuse.td
浏览文件 @
4be77e53
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "paddle/infrt/dialect/infrt/ir/infrt_ops.td"
include "paddle/infrt/dialect/infrt/ir/infrt_ops.td"
include "paddle/infrt/dialect/pd_ops.td"
include "paddle/infrt/dialect/pd
/ir/pd
_ops.td"
def FuseTensorCastPattern : Pat<
def FuseTensorCastPattern : Pat<
(Infrt_TensorCastOp (Infrt_TensorCastOp $arg)),
(Infrt_TensorCastOp (Infrt_TensorCastOp $arg)),
...
...
paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.cc
浏览文件 @
4be77e53
...
@@ -16,7 +16,7 @@
...
@@ -16,7 +16,7 @@
#include <mlir/Transforms/GreedyPatternRewriteDriver.h>
#include <mlir/Transforms/GreedyPatternRewriteDriver.h>
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/pd_ops.h"
#include "paddle/infrt/dialect/pd
/ir/pd
_ops.h"
namespace
{
namespace
{
#include "paddle/infrt/dialect/infrt/pass/infrt_op_fuse.cpp.inc" // NOLINT
#include "paddle/infrt/dialect/infrt/pass/infrt_op_fuse.cpp.inc" // NOLINT
...
...
paddle/infrt/dialect/init_dialects.cc
浏览文件 @
4be77e53
...
@@ -20,12 +20,13 @@
...
@@ -20,12 +20,13 @@
#include "paddle/infrt/dialect/infrt/ir/basic_kernels.h"
#include "paddle/infrt/dialect/infrt/ir/basic_kernels.h"
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/pd_ops.h"
#include "paddle/infrt/dialect/pd
/ir/pd
_ops.h"
#include "paddle/infrt/dialect/phi/ir/infrt_phi_tensor.h"
#include "paddle/infrt/dialect/phi/ir/infrt_phi_tensor.h"
#include "paddle/infrt/dialect/phi/ir/phi_base.h"
#include "paddle/infrt/dialect/phi/ir/phi_base.h"
#include "paddle/infrt/dialect/phi/ir/phi_kernels.h"
#include "paddle/infrt/dialect/phi/ir/phi_kernels.h"
#include "paddle/infrt/dialect/tensor_shape.h"
#include "paddle/infrt/dialect/tensor_shape.h"
#include "paddle/infrt/dialect/tensorrt/trt_ops.h"
namespace
infrt
{
namespace
infrt
{
void
registerCinnDialects
(
mlir
::
DialectRegistry
&
registry
)
{
// NOLINT
void
registerCinnDialects
(
mlir
::
DialectRegistry
&
registry
)
{
// NOLINT
...
@@ -37,7 +38,8 @@ void registerCinnDialects(mlir::DialectRegistry ®istry) { // NOLINT
...
@@ -37,7 +38,8 @@ void registerCinnDialects(mlir::DialectRegistry ®istry) { // NOLINT
phi
::
PHIDenseTensorDialect
,
phi
::
PHIDenseTensorDialect
,
phi
::
PHICPUKernelDialect
,
phi
::
PHICPUKernelDialect
,
phi
::
PHIGPUKernelDialect
,
phi
::
PHIGPUKernelDialect
,
phi
::
PHIDialect
phi
::
PHIDialect
,
infrt
::
trt
::
TensorRTDialect
#endif
#endif
>
();
>
();
}
}
...
...
paddle/infrt/dialect/pd/CMakeLists.txt
0 → 100644
浏览文件 @
4be77e53
add_subdirectory
(
common
)
add_subdirectory
(
ir
)
add_subdirectory
(
pass
)
paddle/infrt/dialect/pd/common/CMakeLists.txt
0 → 100644
浏览文件 @
4be77e53
core_gather_headers
()
gather_srcs
(
infrt_src SRCS
)
paddle/infrt/dialect/pd/ir/CMakeLists.txt
0 → 100644
浏览文件 @
4be77e53
core_gather_headers
()
gather_srcs
(
infrt_src SRCS
pd_ops.cc
)
add_mlir_dialect
(
pd_ops pd
)
mlir_tablegen_on
(
pd_extra_ops
)
paddle/infrt/dialect/pd_extra_ops.td
→
paddle/infrt/dialect/pd
/ir/pd
_extra_ops.td
浏览文件 @
4be77e53
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/LoopLikeInterface.td"
include "mlir/Interfaces/LoopLikeInterface.td"
include "mlir/IR/OpBase.td"
include "mlir/IR/OpBase.td"
include "paddle/infrt/dialect/pd_op_base.td"
include "paddle/infrt/dialect/pd
/ir/pd
_op_base.td"
def PD_FusedFC : PD_Op<"FC", [NoSideEffect]> {
def PD_FusedFC : PD_Op<"FC", [NoSideEffect]> {
let summary = "Computes the Fully Connected result of two tensors";
let summary = "Computes the Fully Connected result of two tensors";
...
...
paddle/infrt/dialect/pd_op_base.td
→
paddle/infrt/dialect/pd
/ir/pd
_op_base.td
浏览文件 @
4be77e53
...
@@ -8,7 +8,7 @@ include "mlir/IR/OpBase.td"
...
@@ -8,7 +8,7 @@ include "mlir/IR/OpBase.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "paddle/infrt/dialect/infrt/ir/infrt_base.td"
include "paddle/infrt/dialect/infrt/ir/infrt_base.td"
def P
D
_Dialect : Dialect {
def P
addle
_Dialect : Dialect {
let name = "pd";
let name = "pd";
let description = [{
let description = [{
...
@@ -16,12 +16,12 @@ def PD_Dialect : Dialect {
...
@@ -16,12 +16,12 @@ def PD_Dialect : Dialect {
This dialect contains the PaddlePaddle operators.
This dialect contains the PaddlePaddle operators.
}];
}];
let hasConstantMaterializer = 1;
let cppNamespace = "mlir::pd";
let cppNamespace = "mlir::pd";
}
}
class PD_Op<string mnemonic, list<OpTrait> traits = []> :
class PD_Op<string mnemonic, list<OpTrait> traits = []> :
Op<P
D
_Dialect, mnemonic, traits>;
Op<P
addle
_Dialect, mnemonic, traits>;
class PD_PaddleAttr <string name, string description> :
class PD_PaddleAttr <string name, string description> :
...
...
paddle/infrt/dialect/pd_ops.cc
→
paddle/infrt/dialect/pd
/ir/pd
_ops.cc
浏览文件 @
4be77e53
...
@@ -12,29 +12,27 @@
...
@@ -12,29 +12,27 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/infrt/dialect/pd_ops.h"
#include "paddle/infrt/dialect/pd
/ir/pd
_ops.h"
#include <mlir/IR/Matchers.h>
#include <mlir/IR/Matchers.h>
#include <mlir/IR/PatternMatch.h>
#include <mlir/IR/PatternMatch.h>
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/pd/ir/pd_opsDialect.cpp.inc"
#define GET_OP_CLASSES
#define GET_OP_CLASSES
#include "paddle/infrt/dialect/pd_ops.cpp.inc" // NOLINT
#include "paddle/infrt/dialect/pd
/ir/pd
_ops.cpp.inc" // NOLINT
#define GET_OP_CLASSES
#define GET_OP_CLASSES
#include "paddle/infrt/dialect/pd_extra_ops.cpp.inc" // NOLINT
#include "paddle/infrt/dialect/pd
/ir/pd
_extra_ops.cpp.inc" // NOLINT
namespace
mlir
{
namespace
mlir
{
namespace
pd
{
namespace
pd
{
void
PaddleDialect
::
initialize
()
{
#include "paddle/infrt/dialect/rewrite.cpp.inc" // NOLINT
PaddleDialect
::
PaddleDialect
(
MLIRContext
*
context
)
:
Dialect
(
"pd"
,
context
,
TypeID
::
get
<
PaddleDialect
>
())
{
addOperations
<
addOperations
<
#define GET_OP_LIST
#define GET_OP_LIST
#include "paddle/infrt/dialect/pd_ops.cpp.inc" // NOLINT
#include "paddle/infrt/dialect/pd
/ir/pd
_ops.cpp.inc" // NOLINT
,
,
#define GET_OP_LIST
#define GET_OP_LIST
#include "paddle/infrt/dialect/pd_extra_ops.cpp.inc" // NOLINT
#include "paddle/infrt/dialect/pd
/ir/pd
_extra_ops.cpp.inc" // NOLINT
>
();
>
();
}
}
...
@@ -73,106 +71,5 @@ mlir::OpFoldResult ConstantOp::fold(
...
@@ -73,106 +71,5 @@ mlir::OpFoldResult ConstantOp::fold(
::
llvm
::
ArrayRef
<
mlir
::
Attribute
>
operands
)
{
::
llvm
::
ArrayRef
<
mlir
::
Attribute
>
operands
)
{
return
value
();
return
value
();
}
}
/*
LogicalResult ElementwiseAdd::inferReturnTypes(
MLIRContext *context,
Optional<Location> location,
ValueRange operands,
DictionaryAttr attributes,
RegionRange regions,
SmallVectorImpl<Type> &inferredReturnTypes) {
inferredReturnTypes.push_back(operands[0].getType());
return success();
}
*/
void
Elementwise_addOp
::
getCanonicalizationPatterns
(
mlir
::
OwningRewritePatternList
&
results
,
mlir
::
MLIRContext
*
context
)
{
results
.
insert
<
FuseMulAdd
>
(
context
);
}
/*
mlir::OpFoldResult ElementwiseAdd::fold(
llvm::ArrayRef<mlir::Attribute> operands) {
if (getElementTypeOrSelf(getType()).isa<FloatType>()) {
if (!operands[0] || !operands[1]) return {};
DenseElementsAttr lhs = operands[0].dyn_cast<DenseElementsAttr>();
DenseElementsAttr rhs = operands[1].dyn_cast<DenseElementsAttr>();
if (!lhs || !rhs) return {};
ShapedType type = getType().template cast<ShapedType>();
if (!type.hasStaticShape()) return {};
Type etype = type.getElementType();
if (!etype.isa<FloatType>()) return {};
SmallVector<APFloat, 6> values;
values.reserve(lhs.getNumElements());
for (const auto zip :
llvm::zip(lhs.getValues<APFloat>(), rhs.getValues<APFloat>())) {
values.push_back(
std::plus<APFloat>()(std::get<0>(zip), std::get<1>(zip)));
}
return DenseElementsAttr::get(type, values);
}
return {};
}
LogicalResult ElementwiseDiv::inferReturnTypes(
MLIRContext *context,
Optional<Location> location,
ValueRange operands,
DictionaryAttr attributes,
RegionRange regions,
SmallVectorImpl<Type> &inferredReturnTypes) {
inferredReturnTypes.push_back(operands[0].getType());
return success();
}
LogicalResult ElementwiseMul::inferReturnTypes(
MLIRContext *context,
Optional<Location> location,
ValueRange operands,
DictionaryAttr attributes,
RegionRange regions,
SmallVectorImpl<Type> &inferredReturnTypes) {
inferredReturnTypes.push_back(operands[0].getType());
return success();
}
LogicalResult ElementwiseSub::inferReturnTypes(
MLIRContext *context,
Optional<Location> location,
ValueRange operands,
DictionaryAttr attributes,
RegionRange regions,
SmallVectorImpl<Type> &inferredReturnTypes) {
inferredReturnTypes.push_back(operands[0].getType());
return success();
}
LogicalResult MulOp::inferReturnTypes(
MLIRContext *context,
Optional<Location> location,
ValueRange operands,
DictionaryAttr attributes,
RegionRange regions,
SmallVectorImpl<Type> &inferredReturnTypes) {
inferredReturnTypes.push_back(operands[0].getType());
return success();
}
void ReluOp::getCanonicalizationPatterns(
mlir::OwningRewritePatternList &results, mlir::MLIRContext *context) {
results.insert<FuseFCRelu>(context);
}
void FusedRepeatedFCRelu::getCanonicalizationPatterns(
mlir::OwningRewritePatternList &results, mlir::MLIRContext *context) {
results.insert<FuseRepeatedFCRelu2>(context);
}
void BatchNormOp::getCanonicalizationPatterns(
mlir::OwningRewritePatternList &results, mlir::MLIRContext *context) {
results.insert<FuseBatchNormWithConvPattern>(context);
}*/
}
// namespace pd
}
// namespace pd
}
// namespace mlir
}
// namespace mlir
paddle/infrt/dialect/pd_ops.h
→
paddle/infrt/dialect/pd
/ir/pd
_ops.h
浏览文件 @
4be77e53
// Copyright (c) 202
1
PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 202
2
PaddlePaddle Authors. All Rights Reserved.
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
@@ -14,49 +14,20 @@
...
@@ -14,49 +14,20 @@
#pragma once
#pragma once
#include <mlir/Dialect/Traits.h>
//===----------------------------------------------------------------------===//
#include <mlir/IR/Attributes.h>
// Dialect
#include <mlir/IR/Builders.h>
//===----------------------------------------------------------------------===//
#include <
mlir/IR/BuiltinOps
.h>
#include <
llvm/ADT/StringMap
.h>
#include <mlir/IR/BuiltinTypes.h>
#include <mlir/IR/BuiltinTypes.h>
#include <mlir/IR/Dialect.h>
#include <mlir/IR/Dialect.h>
#include <mlir/IR/
Matchers
.h>
#include <mlir/IR/
OpDefinition
.h>
#include <mlir/IR/OpImplementation.h>
#include <mlir/IR/OpImplementation.h>
#include <mlir/IR/TypeUtilities.h>
#include <mlir/Interfaces/CallInterfaces.h>
#include <mlir/Interfaces/DerivedAttributeOpInterface.h>
#include <mlir/Interfaces/InferTypeOpInterface.h>
#include <mlir/Interfaces/InferTypeOpInterface.h>
#include <mlir/Interfaces/LoopLikeInterface.h>
#include <mlir/Interfaces/SideEffectInterfaces.h>
#include <mlir/Interfaces/SideEffectInterfaces.h>
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
namespace
mlir
{
namespace
pd
{
class
PaddleDialect
:
public
Dialect
{
public:
explicit
PaddleDialect
(
MLIRContext
*
context
);
static
StringRef
getDialectNamespace
()
{
return
"pd"
;
}
/// A hook used to materialize constant values with the given type.
Operation
*
materializeConstant
(
OpBuilder
&
builder
,
Attribute
value
,
Type
type
,
Location
loc
)
override
;
Type
parseType
(
DialectAsmParser
&
parser
)
const
override
{
return
Dialect
::
parseType
(
parser
);
}
void
printType
(
Type
type
,
DialectAsmPrinter
&
printer
)
const
override
{
Dialect
::
printType
(
type
,
printer
);
}
};
}
// namespace pd
}
// namespace mlir
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/pd/ir/pd_opsDialect.h.inc"
#define GET_OP_CLASSES
#define GET_OP_CLASSES
#include "paddle/infrt/dialect/pd
_ops.hpp
.inc"
#include "paddle/infrt/dialect/pd
/ir/pd_ops.h
.inc"
#define GET_OP_CLASSES
#define GET_OP_CLASSES
#include "paddle/infrt/dialect/pd_extra_ops.hpp.inc"
#include "paddle/infrt/dialect/pd
/ir/pd
_extra_ops.hpp.inc"
paddle/infrt/dialect/pd/pass/CMakeLists.txt
0 → 100644
浏览文件 @
4be77e53
core_gather_headers
()
gather_srcs
(
infrt_src SRCS
pd_op_fuse_pass.cc
)
mlir_add_rewriter
(
pd_op_fuse
)
paddle/infrt/dialect/
rewrit
e.td
→
paddle/infrt/dialect/
pd/pass/pd_op_fus
e.td
浏览文件 @
4be77e53
...
@@ -3,8 +3,8 @@
...
@@ -3,8 +3,8 @@
include "paddle/infrt/dialect/infrt/ir/infrt_base.td"
include "paddle/infrt/dialect/infrt/ir/infrt_base.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "paddle/infrt/dialect/pd_ops.td"
include "paddle/infrt/dialect/pd
/ir/pd
_ops.td"
include "paddle/infrt/dialect/pd_extra_ops.td"
include "paddle/infrt/dialect/pd
/ir/pd
_extra_ops.td"
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// This is to fuse the composition: 'Matmul o ElementwiseAdd' into 'PD_FusedFC'.
// This is to fuse the composition: 'Matmul o ElementwiseAdd' into 'PD_FusedFC'.
...
...
paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.cc
0 → 100644
浏览文件 @
4be77e53
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.h" // NOLINT
#include <mlir/Transforms/GreedyPatternRewriteDriver.h>
#include "paddle/infrt/dialect/pd/ir/pd_ops.h"
namespace
{
#include "paddle/infrt/dialect/pd/pass/pd_op_fuse.cpp.inc" // NOLINT
/*
* PdOpFusePass.
*/
struct
PdOpFusePass
:
public
mlir
::
PassWrapper
<
PdOpFusePass
,
mlir
::
FunctionPass
>
{
public:
::
llvm
::
StringRef
getName
()
const
override
{
return
"PdOpFusePass"
;
}
llvm
::
StringRef
getArgument
()
const
override
{
return
"pd-op-fuse"
;
}
void
runOnFunction
()
override
;
};
// Implementation of the PdOpFusePass.
void
PdOpFusePass
::
runOnFunction
()
{
::
mlir
::
RewritePatternSet
patterns
(
&
getContext
());
populateWithGenerated
(
patterns
);
(
void
)
applyPatternsAndFoldGreedily
(
getOperation
(),
std
::
move
(
patterns
));
}
}
// namespace
mlir
::
PassRegistration
<
PdOpFusePass
>
infrt_op_fuse_pass
;
paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.h
0 → 100644
浏览文件 @
4be77e53
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <mlir/Pass/Pass.h>
namespace
infrt
{
/*
* PdOpFusePass.
*/
std
::
unique_ptr
<
mlir
::
Pass
>
CreatePdOpFusePass
();
}
// namespace infrt
paddle/infrt/dialect/phi/ir/infrt_phi_tensor.td
浏览文件 @
4be77e53
...
@@ -21,8 +21,8 @@ def PHI_DenseTensorDialect : Dialect {
...
@@ -21,8 +21,8 @@ def PHI_DenseTensorDialect : Dialect {
class PDT_Op<string mnemonic, list<OpTrait> traits = []> : Op<PHI_DenseTensorDialect,
class PDT_Op<string mnemonic, list<OpTrait> traits = []> : Op<PHI_DenseTensorDialect,
mnemonic, !listconcat(traits, [PhiOpTrait, IsolatedFromAbove])> {}
mnemonic, !listconcat(traits, [PhiOpTrait, IsolatedFromAbove])> {}
class CreateDenseTensorOp
class CreateDenseTensorOp
<string target>
: PDT_Op<"create_dense_tensor
"
, [NoSideEffect]> {
: PDT_Op<"create_dense_tensor
." # target
, [NoSideEffect]> {
let arguments = (ins Context:$context, I64ArrayAttr:$dims,
let arguments = (ins Context:$context, I64ArrayAttr:$dims,
LayoutAttr:$layout, I64ArrayAttr:$lod, PrecisionAttr:$precision);
LayoutAttr:$layout, I64ArrayAttr:$lod, PrecisionAttr:$precision);
let results = (outs DenseTensor:$output);
let results = (outs DenseTensor:$output);
...
@@ -51,9 +51,11 @@ class CreateContextOp<string target>
...
@@ -51,9 +51,11 @@ class CreateContextOp<string target>
let results = (outs Context:$output);
let results = (outs Context:$output);
}
}
def PDT_CreateDenseTensorOp : CreateDenseTensorOp;
def PDT_CreateCPUDenseTensorOp : CreateDenseTensorOp<"cpu">;
def PDT_CreateGPUDenseTensorOp : CreateDenseTensorOp<"gpu">;
def PDT_FillDenseTensorOp_f32 : FillDenseTensorOp<F32ArrayAttr, "f32">;
def PDT_FillDenseTensorOp_f32 : FillDenseTensorOp<F32ArrayAttr, "f32">;
def PDT_CreateCPUContextOp : CreateContextOp<"cpu">;
def PDT_CreateCPUContextOp : CreateContextOp<"cpu">;
def PDT_CreateGPUContextOp : CreateContextOp<"gpu">;
def PDT_PrintDenseTensor : PrintDenseTensorOp;
def PDT_PrintDenseTensor : PrintDenseTensorOp;
def FakeKernelOp : PDT_Op<"fake_phi_kernel"> {
def FakeKernelOp : PDT_Op<"fake_phi_kernel"> {
...
...
paddle/infrt/dialect/phi/pass/phi_op_convert_pass.cc
浏览文件 @
4be77e53
...
@@ -32,6 +32,7 @@
...
@@ -32,6 +32,7 @@
#include "paddle/infrt/dialect/phi/pass/kernel_op_desc.h"
#include "paddle/infrt/dialect/phi/pass/kernel_op_desc.h"
#include "paddle/infrt/dialect/phi/pass/proto_arg_map_context.h"
#include "paddle/infrt/dialect/phi/pass/proto_arg_map_context.h"
#include "paddle/phi/core/compat/op_utils.h"
#include "paddle/phi/core/compat/op_utils.h"
#include "paddle/phi/core/kernel_factory.h"
#include "paddle/phi/ops/compat/signatures.h"
#include "paddle/phi/ops/compat/signatures.h"
namespace
{
namespace
{
...
@@ -94,42 +95,49 @@ void PhiOpConvertPass::convertStage() {
...
@@ -94,42 +95,49 @@ void PhiOpConvertPass::convertStage() {
// Todo: print log
// Todo: print log
continue
;
continue
;
}
}
auto
loc
=
getFunction
().
getLoc
();
::
phi
::
KernelSignature
kernel_sign
=
builder
.
setInsertionPoint
(
op
);
::
phi
::
OpUtilsMap
::
Instance
().
GetArgumentMappingFn
(
op_name
)(
if
(
phi
::
KernelFactory
::
Instance
().
HasCompatiblePhiKernel
(
op_name
))
{
infrt
::
ProtoArgumentMappingContext
(
op
));
std
::
string
kernel_name
=
phi
::
TransToPhiKernelName
(
op_name
);
// resort input&output according to kernel_sign
auto
kernel_op
=
builder
.
create
<
infrt
::
KernelOp
>
(
loc
,
::
llvm
::
SmallVector
<
mlir
::
Value
,
4
>
inputs
,
ori_output
;
op
->
getResultTypes
(),
::
llvm
::
SmallVector
<
mlir
::
Type
,
4
>
output_types
;
op
->
getOperands
(),
for
(
const
std
::
string
&
str
:
std
::
get
<
0
>
(
kernel_sign
.
args
))
{
kernel_name
,
if
(
pd_dialect_inputs_info_map_
.
at
(
op_name
).
count
(
str
)
==
0
)
{
op
->
getAttrDictionary
());
LOG
(
ERROR
)
<<
"No input info for Op "
<<
op_name
<<
" and argument "
op
->
replaceAllUsesWith
(
kernel_op
.
getResults
());
<<
str
;
}
else
{
return
;
::
phi
::
KernelSignature
kernel_sign
=
::
phi
::
OpUtilsMap
::
Instance
().
GetArgumentMappingFn
(
op_name
)(
infrt
::
ProtoArgumentMappingContext
(
op
));
// resort input&output according to kernel_sign
::
llvm
::
SmallVector
<
mlir
::
Value
,
4
>
inputs
,
ori_output
;
::
llvm
::
SmallVector
<
mlir
::
Type
,
4
>
output_types
;
for
(
const
std
::
string
&
str
:
std
::
get
<
0
>
(
kernel_sign
.
args
))
{
if
(
pd_dialect_inputs_info_map_
.
at
(
op_name
).
count
(
str
)
==
0
)
{
LOG
(
ERROR
)
<<
"No input info for Op "
<<
op_name
<<
" and argument "
<<
str
;
return
;
}
uint8_t
index
=
pd_dialect_inputs_info_map_
.
at
(
op_name
).
at
(
str
);
inputs
.
push_back
(
op
->
getOperands
()[
index
]);
}
}
uint8_t
index
=
pd_dialect_inputs_info_map_
.
at
(
op_name
).
at
(
str
);
inputs
.
push_back
(
op
->
getOperands
()[
index
]);
}
for
(
const
std
::
string
&
str
:
std
::
get
<
2
>
(
kernel_sign
.
args
))
{
for
(
const
std
::
string
&
str
:
std
::
get
<
2
>
(
kernel_sign
.
args
))
{
if
(
pd_dialect_outputs_info_map_
.
at
(
op_name
).
count
(
str
)
==
0
)
{
if
(
pd_dialect_outputs_info_map_
.
at
(
op_name
).
count
(
str
)
==
0
)
{
LOG
(
ERROR
)
<<
"No output info for Op "
<<
op_name
<<
" and argument "
LOG
(
ERROR
)
<<
"No output info for Op "
<<
op_name
<<
" and argument "
<<
str
;
<<
str
;
return
;
return
;
}
uint8_t
index
=
pd_dialect_outputs_info_map_
.
at
(
op_name
).
at
(
str
);
output_types
.
push_back
(
op
->
getResultTypes
()[
index
]);
ori_output
.
push_back
(
op
->
getResult
(
index
));
}
auto
kernel_op
=
builder
.
create
<
infrt
::
KernelOp
>
(
loc
,
output_types
,
inputs
,
kernel_sign
.
name
,
op
->
getAttrDictionary
());
for
(
size_t
index
=
0
;
index
<
ori_output
.
size
();
++
index
)
{
ori_output
[
index
].
replaceAllUsesWith
(
kernel_op
.
getResult
(
index
));
}
}
uint8_t
index
=
pd_dialect_outputs_info_map_
.
at
(
op_name
).
at
(
str
);
output_types
.
push_back
(
op
->
getResultTypes
()[
index
]);
ori_output
.
push_back
(
op
->
getResult
(
index
));
}
auto
loc
=
getFunction
().
getLoc
();
builder
.
setInsertionPoint
(
op
);
auto
kernel_op
=
builder
.
create
<
infrt
::
KernelOp
>
(
loc
,
output_types
,
inputs
,
kernel_sign
.
name
,
op
->
getAttrDictionary
());
for
(
size_t
index
=
0
;
index
<
ori_output
.
size
();
++
index
)
{
ori_output
[
index
].
replaceAllUsesWith
(
kernel_op
.
getResult
(
index
));
}
}
CHECK
(
op
->
use_empty
());
CHECK
(
op
->
use_empty
());
op
->
erase
();
op
->
erase
();
}
}
...
...
paddle/infrt/dialect/phi/pass/proto_arg_map_context.h
浏览文件 @
4be77e53
...
@@ -16,7 +16,7 @@ limitations under the License. */
...
@@ -16,7 +16,7 @@ limitations under the License. */
#include <mlir/IR/Operation.h>
#include <mlir/IR/Operation.h>
#include <unordered_map>
#include <unordered_map>
#include "paddle/infrt/dialect/pd_ops_info.h"
#include "paddle/infrt/dialect/pd
/common/pd
_ops_info.h"
#include "paddle/phi/core/compat/arg_map_context.h"
#include "paddle/phi/core/compat/arg_map_context.h"
namespace
infrt
{
namespace
infrt
{
...
...
paddle/infrt/dialect/tensorrt/pd_lower_to_trt.td
浏览文件 @
4be77e53
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "paddle/infrt/dialect/infrt/ir/infrt_base.td"
include "paddle/infrt/dialect/infrt/ir/infrt_base.td"
include "paddle/infrt/dialect/pd_ops.td"
include "paddle/infrt/dialect/pd
/ir/pd
_ops.td"
include "paddle/infrt/dialect/tensorrt/trt_ops.td"
include "paddle/infrt/dialect/tensorrt/trt_ops.td"
def PD2TRT_Matmul_Lower : Pat<
def PD2TRT_Matmul_Lower : Pat<
...
...
paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.cc
浏览文件 @
4be77e53
...
@@ -17,11 +17,12 @@
...
@@ -17,11 +17,12 @@
#include <llvm/ADT/SetVector.h>
#include <llvm/ADT/SetVector.h>
#include <mlir/Analysis/SliceAnalysis.h>
#include <mlir/Analysis/SliceAnalysis.h>
#include <mlir/IR/Builders.h>
#include <mlir/IR/Builders.h>
#include <paddle/infrt/dialect/pd_ops.h>
#include <list>
#include <list>
#include <unordered_set>
#include <unordered_set>
#include <vector>
#include <vector>
#include "paddle/infrt/dialect/pd/ir/pd_ops.h"
namespace
infrt
{
namespace
infrt
{
namespace
trt
{
namespace
trt
{
namespace
{
namespace
{
...
...
paddle/infrt/dialect/tensorrt/trt_graph_split_pass.cc
浏览文件 @
4be77e53
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
#include "paddle/infrt/dialect/tensorrt/trt_graph_split_pass.h"
#include "paddle/infrt/dialect/tensorrt/trt_graph_split_pass.h"
#include <mlir/IR/Builders.h>
#include <mlir/IR/Builders.h>
#include "paddle/infrt/dialect/pd_ops.h"
#include "paddle/infrt/dialect/pd
/ir/pd
_ops.h"
namespace
infrt
{
namespace
infrt
{
namespace
trt
{
namespace
trt
{
...
...
paddle/infrt/dialect/tensorrt/trt_op_converter_pass.cc
浏览文件 @
4be77e53
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
#include "paddle/infrt/dialect/tensorrt/trt_op_converter_pass.h"
#include "paddle/infrt/dialect/tensorrt/trt_op_converter_pass.h"
#include <mlir/IR/Builders.h>
#include <mlir/IR/Builders.h>
#include <mlir/Transforms/DialectConversion.h>
#include <mlir/Transforms/DialectConversion.h>
#include "paddle/infrt/dialect/pd_ops.h"
#include "paddle/infrt/dialect/pd
/ir/pd
_ops.h"
#include "paddle/infrt/dialect/tensorrt/trt_dialect_types.h"
#include "paddle/infrt/dialect/tensorrt/trt_dialect_types.h"
namespace
infrt
{
namespace
infrt
{
...
...
paddle/infrt/dialect/tensorrt/trt_op_teller_pass.cc
浏览文件 @
4be77e53
...
@@ -17,7 +17,7 @@
...
@@ -17,7 +17,7 @@
#include <mlir/IR/Builders.h>
#include <mlir/IR/Builders.h>
#include "paddle/infrt/dialect/infrt/ir/basic_kernels.h"
#include "paddle/infrt/dialect/infrt/ir/basic_kernels.h"
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/pd_ops.h"
#include "paddle/infrt/dialect/pd
/ir/pd
_ops.h"
namespace
infrt
{
namespace
infrt
{
namespace
trt
{
namespace
trt
{
...
...
paddle/infrt/dialect/tensorrt/trt_ops.cc
浏览文件 @
4be77e53
...
@@ -21,6 +21,10 @@
...
@@ -21,6 +21,10 @@
#include "paddle/infrt/common/global.h"
#include "paddle/infrt/common/global.h"
#include "paddle/infrt/dialect/tensorrt/trt_dialect_types.h"
#include "paddle/infrt/dialect/tensorrt/trt_dialect_types.h"
#include "paddle/infrt/dialect/dense_tensor.h"
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/phi/ir/phi_base.h"
namespace
infrt
{
namespace
infrt
{
namespace
trt
{
namespace
trt
{
...
...
paddle/infrt/dialect/tensorrt/trt_ops.h
浏览文件 @
4be77e53
...
@@ -30,7 +30,7 @@
...
@@ -30,7 +30,7 @@
#include <mlir/Interfaces/SideEffectInterfaces.h>
#include <mlir/Interfaces/SideEffectInterfaces.h>
#include "paddle/infrt/dialect/infrt/ir/basic_kernels.h"
#include "paddle/infrt/dialect/infrt/ir/basic_kernels.h"
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/pd_ops.h"
#include "paddle/infrt/dialect/pd
/ir/pd
_ops.h"
namespace
infrt
{
namespace
infrt
{
namespace
trt
{
namespace
trt
{
...
...
paddle/infrt/dialect/tensorrt/trt_ops.td
浏览文件 @
4be77e53
...
@@ -7,6 +7,8 @@ include "mlir/Interfaces/CallInterfaces.td"
...
@@ -7,6 +7,8 @@ include "mlir/Interfaces/CallInterfaces.td"
include "mlir/IR/OpBase.td"
include "mlir/IR/OpBase.td"
include "paddle/infrt/dialect/tensorrt/trt_op_base.td"
include "paddle/infrt/dialect/tensorrt/trt_op_base.td"
include "paddle/infrt/dialect/infrt/ir/infrt_base.td"
include "paddle/infrt/dialect/phi/ir/infrt_phi_base.td"
def TRT_CreateEngineOp : TRT_Op<"create_engine", [SingleBlockImplicitTerminator<"::infrt::ReturnOp">]> {
def TRT_CreateEngineOp : TRT_Op<"create_engine", [SingleBlockImplicitTerminator<"::infrt::ReturnOp">]> {
let summary = "trt CreateEngine Op";
let summary = "trt CreateEngine Op";
...
@@ -14,8 +16,8 @@ def TRT_CreateEngineOp : TRT_Op<"create_engine", [SingleBlockImplicitTerminator<
...
@@ -14,8 +16,8 @@ def TRT_CreateEngineOp : TRT_Op<"create_engine", [SingleBlockImplicitTerminator<
Describe a tensorrt subgraph.
Describe a tensorrt subgraph.
}];
}];
let regions = (region SizedRegion<1>:$body);
let regions = (region SizedRegion<1>:$body);
let arguments = (ins Variadic<
TRT_
Tensor>:$inputs, DefaultValuedAttr<BoolAttr, "true">:$run_once);
let arguments = (ins Variadic<
Dense
Tensor>:$inputs, DefaultValuedAttr<BoolAttr, "true">:$run_once);
let results = (outs TRT_EngineType:$
output
);
let results = (outs TRT_EngineType:$
engine
);
}
}
def TRT_ExecuteOp : TRT_Op<"execute", [NoSideEffect]> {
def TRT_ExecuteOp : TRT_Op<"execute", [NoSideEffect]> {
...
@@ -23,8 +25,25 @@ def TRT_ExecuteOp : TRT_Op<"execute", [NoSideEffect]> {
...
@@ -23,8 +25,25 @@ def TRT_ExecuteOp : TRT_Op<"execute", [NoSideEffect]> {
let description = [{
let description = [{
Describe a tensorrt runtime.
Describe a tensorrt runtime.
}];
}];
let arguments = (ins TRT_EngineType:$engine, Variadic<TRT_Tensor>:$inputs);
let arguments = (ins TRT_EngineType:$engine, Variadic<DenseTensor>:$inputs);
let results = (outs Variadic<TRT_Tensor>:$output);
let results = (outs Variadic<DenseTensor>:$output);
}
def TRT_EngineComputeOp : TRT_Op<"compute", [NoSideEffect]> {
let summary = "trt compute engine";
let description = [{
execute engine
}];
let arguments = (ins TRT_EngineType:$engine, Context:$context);
let results = (outs DenseTensorList:$outputs);
}
def TRT_InspectEngineOp : TRT_Op<"inspect_engine", [NoSideEffect]> {
let summary = "trt inspect engine";
let description = [{
Show engine
}];
let arguments = (ins TRT_EngineType:$engine);
}
}
def TRT_ActivationOp : TRT_Op<"Activation", [NoSideEffect]> {
def TRT_ActivationOp : TRT_Op<"Activation", [NoSideEffect]> {
...
@@ -34,11 +53,11 @@ def TRT_ActivationOp : TRT_Op<"Activation", [NoSideEffect]> {
...
@@ -34,11 +53,11 @@ def TRT_ActivationOp : TRT_Op<"Activation", [NoSideEffect]> {
TensorRT IActivationLayer.
TensorRT IActivationLayer.
}];
}];
let arguments = (ins
TRT_
Tensor:$input, SI32Attr:$activation_type,
let arguments = (ins
Dense
Tensor:$input, SI32Attr:$activation_type,
DefaultValuedAttr<F32Attr, "0.0">:$alpha,
DefaultValuedAttr<F32Attr, "0.0">:$alpha,
DefaultValuedAttr<F32Attr, "0.0">:$beta);
DefaultValuedAttr<F32Attr, "0.0">:$beta);
let results = (outs
TRT_
Tensor:$output);
let results = (outs
Dense
Tensor:$output);
}
}
def TRT_ElementWiseOp : TRT_Op<"ElementWise", [NoSideEffect]> {
def TRT_ElementWiseOp : TRT_Op<"ElementWise", [NoSideEffect]> {
...
@@ -48,9 +67,9 @@ def TRT_ElementWiseOp : TRT_Op<"ElementWise", [NoSideEffect]> {
...
@@ -48,9 +67,9 @@ def TRT_ElementWiseOp : TRT_Op<"ElementWise", [NoSideEffect]> {
TensorRT IElementWiseLayer.
TensorRT IElementWiseLayer.
}];
}];
let arguments = (ins
TRT_Tensor:$input1, TRT_
Tensor:$input2, SI32Attr:$elementwise_operation);
let arguments = (ins
DenseTensor:$input1, Dense
Tensor:$input2, SI32Attr:$elementwise_operation);
let results = (outs
TRT_
Tensor:$output);
let results = (outs
Dense
Tensor:$output);
}
}
def TRT_MatrixMultiplyOp : TRT_Op<"MatrixMultiply", [NoSideEffect]> {
def TRT_MatrixMultiplyOp : TRT_Op<"MatrixMultiply", [NoSideEffect]> {
...
@@ -60,10 +79,10 @@ def TRT_MatrixMultiplyOp : TRT_Op<"MatrixMultiply", [NoSideEffect]> {
...
@@ -60,10 +79,10 @@ def TRT_MatrixMultiplyOp : TRT_Op<"MatrixMultiply", [NoSideEffect]> {
TensorRT IMatrixMultiplyLayer.
TensorRT IMatrixMultiplyLayer.
}];
}];
let arguments = (ins
TRT_
Tensor:$input1, BoolAttr:$transpose1,
let arguments = (ins
Dense
Tensor:$input1, BoolAttr:$transpose1,
TRT_
Tensor:$input2, BoolAttr:$transpose2);
Dense
Tensor:$input2, BoolAttr:$transpose2);
let results = (outs
TRT_
Tensor:$output);
let results = (outs
Dense
Tensor:$output);
}
}
#endif // TRT_OPS
#endif // TRT_OPS
paddle/infrt/host_context/mlir_exec.cc
浏览文件 @
4be77e53
...
@@ -33,7 +33,10 @@
...
@@ -33,7 +33,10 @@
#include "paddle/infrt/dialect/phi/pass/phi_op_convert_pass.h"
#include "paddle/infrt/dialect/phi/pass/phi_op_convert_pass.h"
#include "paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launchers.h"
#include "paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launchers.h"
#include "paddle/infrt/kernel/phi/registry.h"
#include "paddle/infrt/kernel/phi/registry.h"
#endif
#if defined(INFRT_WITH_GPU) && defined(INFRT_WITH_TRT)
#include "paddle/infrt/kernel/tensorrt/registry.h"
#endif // INFRT_WITH_GPU && INFRT_WITH_TRT
#endif // INFRT_WITH_PHI
static
llvm
::
cl
::
list
<
std
::
string
>
cl_shared_libs
(
// NOLINT
static
llvm
::
cl
::
list
<
std
::
string
>
cl_shared_libs
(
// NOLINT
"shared_libs"
,
"shared_libs"
,
...
@@ -62,6 +65,9 @@ int main(int argc, char** argv) {
...
@@ -62,6 +65,9 @@ int main(int argc, char** argv) {
#ifdef INFRT_WITH_PHI
#ifdef INFRT_WITH_PHI
kernel
::
RegisterPhiKernels
(
&
registry
);
kernel
::
RegisterPhiKernels
(
&
registry
);
kernel
::
RegisterInferShapeLaunchers
(
&
registry
);
kernel
::
RegisterInferShapeLaunchers
(
&
registry
);
#if defined(INFRT_WITH_GPU) && defined(INFRT_WITH_TRT)
kernel
::
RegisterTrtKernels
(
&
registry
);
#endif // INFRT_WITH_GPU && INFRT_WITH_TRT
#endif
#endif
// load extra shared library
// load extra shared library
...
...
paddle/infrt/host_context/mlir_to_runtime_translate.cc
浏览文件 @
4be77e53
...
@@ -16,12 +16,14 @@
...
@@ -16,12 +16,14 @@
#include <llvm/Support/SourceMgr.h>
#include <llvm/Support/SourceMgr.h>
#include <mlir/Dialect/StandardOps/IR/Ops.h>
#include <mlir/Dialect/StandardOps/IR/Ops.h>
#include <mlir/IR/BuiltinAttributes.h>
#include <mlir/IR/BuiltinOps.h>
#include <mlir/IR/BuiltinOps.h>
#include <mlir/IR/BuiltinTypes.h>
#include <mlir/IR/BuiltinTypes.h>
#include <mlir/IR/Diagnostics.h>
#include <mlir/IR/Diagnostics.h>
#include <mlir/IR/OperationSupport.h>
#include <mlir/IR/OperationSupport.h>
#include <mlir/Parser.h>
#include <mlir/Parser.h>
#include <glog/logging.h>
#include <iostream>
#include <iostream>
#include <memory>
#include <memory>
#include <string>
#include <string>
...
@@ -42,6 +44,13 @@
...
@@ -42,6 +44,13 @@
#include "paddle/infrt/host_context/value.h"
#include "paddle/infrt/host_context/value.h"
#include "paddle/infrt/tensor/tensor_shape.h"
#include "paddle/infrt/tensor/tensor_shape.h"
#ifdef INFRT_WITH_PHI
#ifdef INFRT_WITH_TRT
#include "paddle/infrt/kernel/tensorrt/trt_kernels.h"
#endif
#include "paddle/phi/core/dense_tensor.h"
#endif
namespace
infrt
{
namespace
infrt
{
namespace
host_context
{
namespace
host_context
{
...
@@ -277,33 +286,58 @@ bool MlirToRuntimeTranslator::EmitGeneralOp(
...
@@ -277,33 +286,58 @@ bool MlirToRuntimeTranslator::EmitGeneralOp(
impl_
->
runtime
->
NewOpExecutable
(
op
->
getName
().
getStringRef
().
str
());
impl_
->
runtime
->
NewOpExecutable
(
op
->
getName
().
getStringRef
().
str
());
VLOG
(
3
)
<<
"processing general op : "
<<
op
->
getName
().
getStringRef
().
str
();
VLOG
(
3
)
<<
"processing general op : "
<<
op
->
getName
().
getStringRef
().
str
();
// TODO(wilber): Find a more appropriate way to handle special cases.
if
(
op
->
getName
().
getStringRef
()
==
"trt.create_engine"
)
{
#ifdef INFRT_WITH_TRT
auto
*
symbols
=
impl_
->
runtime
->
symbol_table
();
::
infrt
::
kernel
::
tensorrt
::
MlirOperationWithInfrtSymbol
mlir_operation
;
mlir_operation
.
operation
=
op
;
mlir_operation
.
symbol_table
=
symbols
;
impl_
->
cur_op
->
AppendArgument
(
new
Value
(
mlir_operation
));
// TODO(wilber): how to pass DenseTensor to create_engine op? temporialiy
// add a naive implement.
for
(
int
i
=
0
,
e
=
op
->
getNumOperands
();
i
<
e
;
++
i
)
{
auto
operand
=
op
->
getOperand
(
i
);
if
(
operand
.
isa
<
mlir
::
BlockArgument
>
())
{
mlir
::
BlockArgument
arg
=
operand
.
dyn_cast
<
mlir
::
BlockArgument
>
();
Value
*
arg_value
=
GetValue
(
arg
);
if
(
arg_value
->
is_type
<
phi
::
DenseTensor
>
())
{
impl_
->
runtime
->
FeedInArgs
(
std
::
make_pair
(
std
::
to_string
(
i
),
ValueRef
(
arg_value
)));
}
}
}
#else
CHECK
(
false
)
<<
"should not reach here"
;
#endif
}
else
{
// process operands
for
(
int
i
=
0
,
e
=
op
->
getNumOperands
();
i
<
e
;
i
++
)
{
// function argument as value
auto
operand
=
op
->
getOperand
(
i
);
/// if (operand.getKind() == mlir::Value::Kind::BlockArgument) {
if
(
operand
.
isa
<
mlir
::
BlockArgument
>
())
{
mlir
::
BlockArgument
arg
=
operand
.
dyn_cast
<
mlir
::
BlockArgument
>
();
Value
*
arg_value
=
GetValue
(
arg
);
impl_
->
cur_op
->
AppendArgument
(
arg_value
);
VLOG
(
3
)
<<
"* op mlir operand: "
<<
DumpToString
(
arg
)
<<
" "
<<
GetValue
(
arg
);
continue
;
}
// process operands
// normal value
for
(
int
i
=
0
,
e
=
op
->
getNumOperands
();
i
<
e
;
i
++
)
{
Value
*
arg_value
=
GetValue
(
operand
);
// function argument as value
if
(
!
arg_value
)
{
auto
operand
=
op
->
getOperand
(
i
);
auto
upstream_op
=
operand
.
getDefiningOp
(
);
/// if (operand.getKind() == mlir::Value::Kind::BlockArgument) {
arg_value
=
GetOpResult
(
upstream_op
);
if
(
operand
.
isa
<
mlir
::
BlockArgument
>
())
{
}
mlir
::
BlockArgument
arg
=
operand
.
dyn_cast
<
mlir
::
BlockArgument
>
();
CHECK
(
arg_value
)
<<
"No-exist argument value found: "
Value
*
arg_value
=
GetValue
(
arg
);
<<
DumpToString
(
operand
);
impl_
->
cur_op
->
AppendArgument
(
arg_value
);
impl_
->
cur_op
->
AppendArgument
(
arg_value
);
VLOG
(
3
)
<<
"* op mlir operand: "
<<
DumpToString
(
arg
)
<<
" "
<<
GetValue
(
arg
);
continue
;
}
// normal value
VLOG
(
3
)
<<
"* op mlir operand: "
<<
DumpToString
(
operand
)
<<
" "
Value
*
arg_value
=
GetValue
(
operand
);
<<
GetValue
(
operand
)
<<
" vs "
<<
arg_value
;
if
(
!
arg_value
)
{
auto
upstream_op
=
operand
.
getDefiningOp
();
arg_value
=
GetOpResult
(
upstream_op
);
}
}
CHECK
(
arg_value
)
<<
"No-exist argument value found: "
<<
DumpToString
(
operand
);
impl_
->
cur_op
->
AppendArgument
(
arg_value
);
VLOG
(
3
)
<<
"* op mlir operand: "
<<
DumpToString
(
operand
)
<<
" "
<<
GetValue
(
operand
)
<<
" vs "
<<
arg_value
;
}
}
// process attributes
// process attributes
...
@@ -383,33 +417,6 @@ bool MlirToRuntimeTranslator::EmitGeneralOp(
...
@@ -383,33 +417,6 @@ bool MlirToRuntimeTranslator::EmitGeneralOp(
impl_
->
cur_op
->
AppendAttribute
(
tmp
[
i
]);
impl_
->
cur_op
->
AppendAttribute
(
tmp
[
i
]);
}
}
// process results
llvm
::
SmallVector
<
Value
*
,
4
>
res_values
;
for
(
int
i
=
0
,
e
=
op
->
getNumResults
();
i
<
e
;
i
++
)
{
auto
res
=
op
->
getResult
(
i
);
if
(
res
.
getType
().
isa
<::
infrt
::
DenseTensorType
>
())
{
auto
r
=
impl_
->
value_map
.
try_emplace
(
res
,
ValueRef
(
new
Value
{
::
phi
::
DenseTensor
()}));
CHECK
(
r
.
second
)
<<
"Duplicate add mlir value ["
<<
DumpToString
(
res
)
<<
"]"
;
res_values
.
push_back
(
r
.
first
->
second
.
get
());
}
else
{
res_values
.
push_back
(
AddValue
(
res
));
}
VLOG
(
3
)
<<
"* op mlir res: "
<<
DumpToString
(
res
)
<<
" "
<<
GetValue
(
res
);
}
impl_
->
cur_op
->
SetResults
(
res_values
);
#ifdef INFRT_DEBUG
{
VLOG
(
3
)
<<
"check result"
;
for
(
int
i
=
0
;
i
<
impl_
->
cur_op
->
frame
().
GetNumResults
();
i
++
)
{
VLOG
(
3
)
<<
"+ res value: "
<<
impl_
->
cur_op
->
frame
().
GetResults
()[
i
];
}
}
#endif
// process regions, we treat regions as attribute.
// process regions, we treat regions as attribute.
auto
num_regions
=
op
->
getNumRegions
();
auto
num_regions
=
op
->
getNumRegions
();
if
(
num_regions
>
0
)
{
if
(
num_regions
>
0
)
{
...
@@ -438,6 +445,33 @@ bool MlirToRuntimeTranslator::EmitGeneralOp(
...
@@ -438,6 +445,33 @@ bool MlirToRuntimeTranslator::EmitGeneralOp(
impl_
->
cur_op
->
AppendAttribute
(
new
Value
(
function
));
impl_
->
cur_op
->
AppendAttribute
(
new
Value
(
function
));
}
}
// process results
llvm
::
SmallVector
<
Value
*
,
4
>
res_values
;
for
(
int
i
=
0
,
e
=
op
->
getNumResults
();
i
<
e
;
i
++
)
{
auto
res
=
op
->
getResult
(
i
);
if
(
res
.
getType
().
isa
<::
infrt
::
DenseTensorType
>
())
{
auto
r
=
impl_
->
value_map
.
try_emplace
(
res
,
ValueRef
(
new
Value
{
::
phi
::
DenseTensor
()}));
CHECK
(
r
.
second
)
<<
"Duplicate add mlir value ["
<<
DumpToString
(
res
)
<<
"]"
;
res_values
.
push_back
(
r
.
first
->
second
.
get
());
}
else
{
res_values
.
push_back
(
AddValue
(
res
));
}
VLOG
(
3
)
<<
"* op mlir res: "
<<
DumpToString
(
res
)
<<
" "
<<
GetValue
(
res
);
}
impl_
->
cur_op
->
SetResults
(
res_values
);
#ifdef INFRT_DEBUG
{
VLOG
(
3
)
<<
"check result"
;
for
(
int
i
=
0
;
i
<
impl_
->
cur_op
->
frame
().
GetNumResults
();
i
++
)
{
VLOG
(
3
)
<<
"+ res value: "
<<
impl_
->
cur_op
->
frame
().
GetResults
()[
i
];
}
}
#endif
return
true
;
return
true
;
}
}
...
...
paddle/infrt/host_context/paddle_mlir.cc
浏览文件 @
4be77e53
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
#include "paddle/infrt/host_context/paddle_mlir.h"
#include "paddle/infrt/host_context/paddle_mlir.h"
#include "paddle/infrt/dialect/infrt/ir/basic_kernels.h"
#include "paddle/infrt/dialect/infrt/ir/basic_kernels.h"
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/pd_ops_info.h"
#include "paddle/infrt/dialect/pd
/common/pd
_ops_info.h"
MLIRModelGenImpl
::
MLIRModelGenImpl
()
MLIRModelGenImpl
::
MLIRModelGenImpl
()
:
context_
(
infrt
::
Global
::
getMLIRContext
()),
builder_
(
context_
)
{
:
context_
(
infrt
::
Global
::
getMLIRContext
()),
builder_
(
context_
)
{
...
...
paddle/infrt/host_context/paddle_mlir.h
浏览文件 @
4be77e53
...
@@ -14,22 +14,22 @@
...
@@ -14,22 +14,22 @@
#ifndef PADDLE_INFRT_HOST_CONTEXT_PADDLE_MLIR_H_
#ifndef PADDLE_INFRT_HOST_CONTEXT_PADDLE_MLIR_H_
#define PADDLE_INFRT_HOST_CONTEXT_PADDLE_MLIR_H_
#define PADDLE_INFRT_HOST_CONTEXT_PADDLE_MLIR_H_
#include <llvm/Support/CommandLine.h>
#include <mlir/Dialect/StandardOps/IR/Ops.h>
#include <mlir/IR/AsmState.h>
#include <mlir/IR/Builders.h>
#include <mlir/IR/BuiltinOps.h>
#include <mlir/IR/MLIRContext.h>
#include <fstream>
#include <fstream>
#include <iostream>
#include <iostream>
#include <string>
#include <string>
#include "llvm/Support/CommandLine.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/AsmState.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/MLIRContext.h"
#include "paddle/infrt/common/global.h"
#include "paddle/infrt/common/global.h"
#include "paddle/infrt/common/string.h"
#include "paddle/infrt/common/string.h"
#include "paddle/infrt/dialect/dense_tensor.h"
#include "paddle/infrt/dialect/dense_tensor.h"
#include "paddle/infrt/dialect/infrt/ir/basic_kernels.h"
#include "paddle/infrt/dialect/infrt/ir/basic_kernels.h"
#include "paddle/infrt/dialect/init_dialects.h"
#include "paddle/infrt/dialect/init_dialects.h"
#include "paddle/infrt/dialect/pd_ops.h"
#include "paddle/infrt/dialect/pd
/ir/pd
_ops.h"
#include "paddle/infrt/dialect/tensor_shape.h"
#include "paddle/infrt/dialect/tensor_shape.h"
#include "paddle/infrt/paddle/model_parser.h"
#include "paddle/infrt/paddle/model_parser.h"
...
...
paddle/infrt/host_context/value.h
浏览文件 @
4be77e53
...
@@ -24,6 +24,7 @@
...
@@ -24,6 +24,7 @@
#include "paddle/infrt/common/shared.h"
#include "paddle/infrt/common/shared.h"
#include "paddle/infrt/dialect/infrt/common/types.h"
#include "paddle/infrt/dialect/infrt/common/types.h"
#include "paddle/infrt/host_context/function.h"
#include "paddle/infrt/host_context/function.h"
#include "paddle/infrt/host_context/symbol_table.h"
#include "paddle/infrt/support/variant.h"
#include "paddle/infrt/support/variant.h"
#include "paddle/infrt/tensor/dense_host_tensor.h"
#include "paddle/infrt/tensor/dense_host_tensor.h"
#include "paddle/infrt/tensor/dense_tensor_view.h"
#include "paddle/infrt/tensor/dense_tensor_view.h"
...
@@ -41,7 +42,15 @@
...
@@ -41,7 +42,15 @@
#include "paddle/phi/common/scalar_array.h"
#include "paddle/phi/common/scalar_array.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/meta_tensor.h"
#include "paddle/phi/core/meta_tensor.h"
#endif
#ifdef INFRT_WITH_GPU
#include "paddle/phi/backends/gpu/gpu_context.h"
#endif // INFRT_WITH_GPU
#ifdef INFRT_WITH_TRT
#include "paddle/infrt/backends/tensorrt/trt_engine.h"
#include "paddle/infrt/kernel/tensorrt/trt_kernels.h"
#endif // INFRT_WITH_TRT
#endif // INFRT_WITH_PHI
namespace
infrt
{
namespace
infrt
{
namespace
host_context
{
namespace
host_context
{
...
@@ -72,8 +81,13 @@ using ValueVariantType =
...
@@ -72,8 +81,13 @@ using ValueVariantType =
::
phi
::
MetaTensor
,
::
phi
::
MetaTensor
,
::
phi
::
DenseTensor
,
::
phi
::
DenseTensor
,
backends
::
CpuPhiContext
,
backends
::
CpuPhiContext
,
#ifdef INFRT_WITH_GPU
backends
::
GpuPhiContext
,
::
phi
::
GPUContext
,
#endif
::
phi
::
CPUContext
,
::
phi
::
CPUContext
,
std
::
vector
<
const
phi
::
DenseTensor
*>
,
std
::
vector
<
const
phi
::
DenseTensor
*>
,
std
::
vector
<
phi
::
DenseTensor
*>
,
paddle
::
experimental
::
ScalarBase
<
phi
::
DenseTensor
>
,
paddle
::
experimental
::
ScalarBase
<
phi
::
DenseTensor
>
,
paddle
::
experimental
::
ScalarArrayBase
<
phi
::
DenseTensor
>
,
paddle
::
experimental
::
ScalarArrayBase
<
phi
::
DenseTensor
>
,
std
::
vector
<
phi
::
MetaTensor
*>
,
std
::
vector
<
phi
::
MetaTensor
*>
,
...
@@ -81,6 +95,10 @@ using ValueVariantType =
...
@@ -81,6 +95,10 @@ using ValueVariantType =
paddle
::
experimental
::
Backend
,
paddle
::
experimental
::
Backend
,
paddle
::
experimental
::
DataLayout
,
paddle
::
experimental
::
DataLayout
,
paddle
::
experimental
::
DataType
,
paddle
::
experimental
::
DataType
,
#ifdef INFRT_WITH_TRT
::
infrt
::
backends
::
tensorrt
::
TrtEngine
,
::
infrt
::
kernel
::
tensorrt
::
MlirOperationWithInfrtSymbol
,
#endif // INFRT_WITH_TRT
#endif
#endif
std
::
vector
<
int16_t
>
,
std
::
vector
<
int16_t
>
,
std
::
vector
<
int32_t
>
,
std
::
vector
<
int32_t
>
,
...
@@ -120,8 +138,18 @@ class Value : public common::Object {
...
@@ -120,8 +138,18 @@ class Value : public common::Object {
#ifdef INFRT_WITH_PHI
#ifdef INFRT_WITH_PHI
explicit
Value
(
::
phi
::
CPUContext
&&
x
)
:
data
(
std
::
move
(
x
))
{}
explicit
Value
(
::
phi
::
CPUContext
&&
x
)
:
data
(
std
::
move
(
x
))
{}
explicit
Value
(
backends
::
CpuPhiContext
&&
x
)
:
data
(
std
::
move
(
x
))
{}
explicit
Value
(
backends
::
CpuPhiContext
&&
x
)
:
data
(
std
::
move
(
x
))
{}
#ifdef INFRT_WITH_GPU
explicit
Value
(
::
phi
::
GPUContext
&&
x
)
:
data
(
std
::
move
(
x
))
{}
explicit
Value
(
backends
::
GpuPhiContext
&&
x
)
:
data
(
std
::
move
(
x
))
{}
#endif
explicit
Value
(
::
phi
::
DenseTensor
&&
x
)
:
data
(
std
::
move
(
x
))
{}
explicit
Value
(
::
phi
::
DenseTensor
&&
x
)
:
data
(
std
::
move
(
x
))
{}
explicit
Value
(
::
phi
::
MetaTensor
&&
x
)
:
data
(
std
::
move
(
x
))
{}
explicit
Value
(
::
phi
::
MetaTensor
&&
x
)
:
data
(
std
::
move
(
x
))
{}
#ifdef INFRT_WITH_TRT
explicit
Value
(
::
infrt
::
backends
::
tensorrt
::
TrtEngine
&&
x
)
:
data
(
std
::
move
(
x
))
{}
explicit
Value
(
::
infrt
::
kernel
::
tensorrt
::
MlirOperationWithInfrtSymbol
x
)
:
data
(
x
)
{}
#endif // INFRT_WITH_TRT
#endif
#endif
template
<
typename
T
>
template
<
typename
T
>
...
...
paddle/infrt/kernel/CMakeLists.txt
浏览文件 @
4be77e53
add_subdirectory
(
phi
)
add_subdirectory
(
phi
)
add_subdirectory
(
tensorrt
)
core_gather_headers
()
core_gather_headers
()
...
...
paddle/infrt/kernel/phi/context_kernels.cc
浏览文件 @
4be77e53
...
@@ -25,6 +25,16 @@ namespace phi {
...
@@ -25,6 +25,16 @@ namespace phi {
return
ctx
;
return
ctx
;
}
}
#ifdef INFRT_WITH_GPU
::
phi
::
GPUContext
CreateGPUContext
()
{
::
phi
::
GPUContext
context
;
context
.
PartialInitWithoutAllocator
();
context
.
SetAllocator
(
new
::
infrt
::
backends
::
GpuPhiAllocator
{});
context
.
PartialInitWithAllocator
();
return
context
;
}
#endif
}
// namespace phi
}
// namespace phi
}
// namespace kernel
}
// namespace kernel
}
// namespace infrt
}
// namespace infrt
paddle/infrt/kernel/phi/context_kernels.h
浏览文件 @
4be77e53
...
@@ -25,6 +25,10 @@ namespace phi {
...
@@ -25,6 +25,10 @@ namespace phi {
::
phi
::
CPUContext
CreateCPUContext
();
::
phi
::
CPUContext
CreateCPUContext
();
#ifdef INFRT_WITH_GPU
::
phi
::
GPUContext
CreateGPUContext
();
#endif
}
// namespace phi
}
// namespace phi
}
// namespace kernel
}
// namespace kernel
}
// namespace infrt
}
// namespace infrt
paddle/infrt/kernel/phi/dense_tensor_kernels.cc
浏览文件 @
4be77e53
...
@@ -15,6 +15,12 @@
...
@@ -15,6 +15,12 @@
#include "paddle/infrt/kernel/phi/dense_tensor_kernels.h"
#include "paddle/infrt/kernel/phi/dense_tensor_kernels.h"
#include "paddle/infrt/dialect/phi/data_type.h"
#include "paddle/infrt/dialect/phi/data_type.h"
#include "paddle/infrt/kernel/phi/context_kernels.h"
#include "paddle/infrt/kernel/phi/context_kernels.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/common/place.h"
#ifdef INFRT_WITH_GPU
#include <cuda_runtime.h>
#endif
namespace
infrt
{
namespace
infrt
{
namespace
kernel
{
namespace
kernel
{
...
@@ -34,26 +40,83 @@ namespace phi {
...
@@ -34,26 +40,83 @@ namespace phi {
{}));
{}));
}
}
::
phi
::
DenseTensor
CreateGPUDenseTensor
(
const
::
phi
::
GPUContext
&
context
,
host_context
::
Attribute
<
std
::
vector
<
int64_t
>>
dims
,
host_context
::
Attribute
<
std
::
vector
<
int64_t
>>
lod
,
host_context
::
Attribute
<::
infrt
::
LayoutType
>
layout
,
host_context
::
Attribute
<::
infrt
::
PrecisionType
>
precision
)
{
return
::
phi
::
DenseTensor
(
const_cast
<::
phi
::
Allocator
*>
(
&
context
.
GetAllocator
()),
::
phi
::
DenseTensorMeta
(
ConvertPrecisionToPhi
(
precision
.
get
()),
::
phi
::
make_ddim
(
dims
.
get
()),
ConvertLayoutToPhi
(
layout
.
get
()),
{}));
}
void
FillDenseTensorF32
(
::
phi
::
DenseTensor
*
dense_tensor
,
void
FillDenseTensorF32
(
::
phi
::
DenseTensor
*
dense_tensor
,
host_context
::
Attribute
<
std
::
vector
<
float
>>
value
)
{
host_context
::
Attribute
<
std
::
vector
<
float
>>
value
)
{
auto
place
=
::
phi
::
CPUP
lace
();
auto
place
=
dense_tensor
->
p
lace
();
float
*
a_data
=
dense_tensor
->
mutable_data
<
float
>
(
place
);
float
*
a_data
=
dense_tensor
->
mutable_data
<
float
>
(
place
);
for
(
int64_t
i
=
0
;
i
<
dense_tensor
->
numel
();
++
i
)
{
if
(
place
.
GetType
()
==
::
phi
::
AllocationType
::
CPU
)
{
a_data
[
i
]
=
(
value
.
get
())[
i
];
for
(
int64_t
i
=
0
;
i
<
dense_tensor
->
numel
();
++
i
)
{
a_data
[
i
]
=
(
value
.
get
())[
i
];
}
}
else
if
(
place
.
GetType
()
==
::
phi
::
AllocationType
::
GPU
)
{
#ifdef INFRT_WITH_GPU
// TODO(wilber): how to set the stream parameter to copy with stream.
cudaMemcpy
(
a_data
,
value
.
get
().
data
(),
sizeof
(
float
)
*
value
.
get
().
size
(),
cudaMemcpyHostToDevice
);
#endif
}
else
{
llvm_unreachable
(
"temporarily not support other target."
);
}
}
}
}
void
PrintDenseTensor
(
::
phi
::
DenseTensor
*
dense_tensor
)
{
void
PrintDenseTensor
(
::
phi
::
DenseTensor
*
dense_tensor
)
{
#define PRINT_META_DATA(PHI_DATATYPE, DTYPE) \
#ifndef INFRT_WITH_GPU
case ::phi::DataType::PHI_DATATYPE: { \
#define PRINT_META_DATA(PHI_DATATYPE, DTYPE) \
DTYPE* data = dense_tensor->data<DTYPE>(); \
case ::phi::DataType::PHI_DATATYPE: { \
if (dense_tensor->numel() == 0) break; \
auto place = dense_tensor->place(); \
std::cout << data[0]; \
if (place.GetType() == ::phi::AllocationType::CPU) { \
for (int64_t i = 1; i < dense_tensor->numel(); i++) { \
DTYPE* data = dense_tensor->data<DTYPE>(); \
std::cout << "," << data[i]; \
if (dense_tensor->numel() == 0) break; \
} \
std::cout << data[0]; \
break; \
for (int64_t i = 1; i < dense_tensor->numel(); i++) { \
std::cout << "," << data[i]; \
} \
} \
break; \
}
#else
#define PRINT_META_DATA(PHI_DATATYPE, DTYPE) \
case ::phi::DataType::PHI_DATATYPE: { \
auto place = dense_tensor->place(); \
DTYPE* data = dense_tensor->data<DTYPE>(); \
if (dense_tensor->numel() == 0) break; \
if (place.GetType() == ::phi::AllocationType::CPU) { \
std::cout << data[0]; \
for (int64_t i = 1; i < dense_tensor->numel(); i++) { \
std::cout << "," << data[i]; \
} \
} else if (place.GetType() == ::phi::AllocationType::GPU) { \
std::vector<DTYPE> host_data(dense_tensor->numel(), 0); \
cudaMemcpy(host_data.data(), \
data, \
sizeof(DTYPE) * dense_tensor->numel(), \
cudaMemcpyDeviceToHost); \
std::cout << host_data[0]; \
for (int64_t i = 1; i < dense_tensor->numel(); i++) { \
std::cout << "," << host_data[i]; \
} \
} else { \
llvm_unreachable("temporarily not support other target."); \
} \
break; \
}
}
#endif
::
phi
::
DDim
dims
=
dense_tensor
->
dims
();
::
phi
::
DDim
dims
=
dense_tensor
->
dims
();
std
::
cout
<<
"dense_tensor: shape=shape"
<<
dims
.
to_str
()
<<
","
std
::
cout
<<
"dense_tensor: shape=shape"
<<
dims
.
to_str
()
<<
","
...
...
paddle/infrt/kernel/phi/dense_tensor_kernels.h
浏览文件 @
4be77e53
...
@@ -30,6 +30,13 @@ namespace phi {
...
@@ -30,6 +30,13 @@ namespace phi {
host_context
::
Attribute
<::
infrt
::
LayoutType
>
layout
,
host_context
::
Attribute
<::
infrt
::
LayoutType
>
layout
,
host_context
::
Attribute
<::
infrt
::
PrecisionType
>
precision
);
host_context
::
Attribute
<::
infrt
::
PrecisionType
>
precision
);
::
phi
::
DenseTensor
CreateGPUDenseTensor
(
const
::
phi
::
GPUContext
&
context
,
host_context
::
Attribute
<
std
::
vector
<
int64_t
>>
dims
,
host_context
::
Attribute
<
std
::
vector
<
int64_t
>>
lod
,
host_context
::
Attribute
<::
infrt
::
LayoutType
>
layout
,
host_context
::
Attribute
<::
infrt
::
PrecisionType
>
precision
);
void
FillDenseTensorF32
(
::
phi
::
DenseTensor
*
dense_tensor
,
void
FillDenseTensorF32
(
::
phi
::
DenseTensor
*
dense_tensor
,
host_context
::
Attribute
<
std
::
vector
<
float
>>
values
);
host_context
::
Attribute
<
std
::
vector
<
float
>>
values
);
void
PrintDenseTensor
(
::
phi
::
DenseTensor
*
dense_tensor
);
void
PrintDenseTensor
(
::
phi
::
DenseTensor
*
dense_tensor
);
...
...
paddle/infrt/kernel/phi/registry.cc
浏览文件 @
4be77e53
...
@@ -35,7 +35,7 @@ void RegisterPhiKernels(host_context::KernelRegistry* registry) {
...
@@ -35,7 +35,7 @@ void RegisterPhiKernels(host_context::KernelRegistry* registry) {
registry
->
AddKernel
(
"phi_dt.create_context.cpu"
,
registry
->
AddKernel
(
"phi_dt.create_context.cpu"
,
INFRT_KERNEL
(
infrt
::
kernel
::
phi
::
CreateCPUContext
));
INFRT_KERNEL
(
infrt
::
kernel
::
phi
::
CreateCPUContext
));
registry
->
AddKernelWithAttrs
(
registry
->
AddKernelWithAttrs
(
"phi_dt.create_dense_tensor"
,
"phi_dt.create_dense_tensor
.cpu
"
,
INFRT_KERNEL
(
infrt
::
kernel
::
phi
::
CreateDenseTensor
),
INFRT_KERNEL
(
infrt
::
kernel
::
phi
::
CreateDenseTensor
),
{
"dims"
,
"lod"
,
"layout"
,
"precision"
});
{
"dims"
,
"lod"
,
"layout"
,
"precision"
});
registry
->
AddKernelWithAttrs
(
registry
->
AddKernelWithAttrs
(
...
@@ -44,6 +44,15 @@ void RegisterPhiKernels(host_context::KernelRegistry* registry) {
...
@@ -44,6 +44,15 @@ void RegisterPhiKernels(host_context::KernelRegistry* registry) {
{
"value"
});
{
"value"
});
registry
->
AddKernel
(
"phi_dt.print_tensor"
,
registry
->
AddKernel
(
"phi_dt.print_tensor"
,
INFRT_KERNEL
(
infrt
::
kernel
::
phi
::
PrintDenseTensor
));
INFRT_KERNEL
(
infrt
::
kernel
::
phi
::
PrintDenseTensor
));
#ifdef INFRT_WITH_GPU
registry
->
AddKernel
(
"phi_dt.create_context.gpu"
,
INFRT_KERNEL
(
infrt
::
kernel
::
phi
::
CreateGPUContext
));
registry
->
AddKernelWithAttrs
(
"phi_dt.create_dense_tensor.gpu"
,
INFRT_KERNEL
(
infrt
::
kernel
::
phi
::
CreateGPUDenseTensor
),
{
"dims"
,
"lod"
,
"layout"
,
"precision"
});
#endif
}
}
}
// namespace kernel
}
// namespace kernel
...
...
paddle/infrt/kernel/tensor_kernels.cc
浏览文件 @
4be77e53
...
@@ -25,6 +25,10 @@
...
@@ -25,6 +25,10 @@
#include "paddle/infrt/tensor/tensor_map.h"
#include "paddle/infrt/tensor/tensor_map.h"
#include "paddle/infrt/tensor/tensor_shape.h"
#include "paddle/infrt/tensor/tensor_shape.h"
#ifdef INFRT_WITH_PHI
#include "paddle/phi/core/dense_tensor.h"
#endif
namespace
infrt
{
namespace
infrt
{
namespace
kernel
{
namespace
kernel
{
using
namespace
host_context
;
// NOLINT
using
namespace
host_context
;
// NOLINT
...
@@ -62,6 +66,20 @@ DenseHostTensor TensorMapGetTensor(TensorMap map, Attribute<std::string> name) {
...
@@ -62,6 +66,20 @@ DenseHostTensor TensorMapGetTensor(TensorMap map, Attribute<std::string> name) {
int32_t
TensorMapGetSize
(
TensorMap
map
)
{
return
map
.
size
();
}
int32_t
TensorMapGetSize
(
TensorMap
map
)
{
return
map
.
size
();
}
// TODO(wilber): Maybe we should place TensorList type in dt dialect.
#ifdef INFRT_WITH_PHI
phi
::
DenseTensor
TensorListGetTensor
(
std
::
vector
<
phi
::
DenseTensor
*>
list
,
Attribute
<
int32_t
>
idx
)
{
CHECK_LT
(
idx
.
get
(),
static_cast
<
int
>
(
list
.
size
()))
<<
"idx should less than list size"
;
return
*
list
[
idx
.
get
()];
}
int32_t
TensorListGetSize
(
const
std
::
vector
<
phi
::
DenseTensor
*>
&
list
)
{
return
list
.
size
();
}
#endif
DenseHostTensor
ShallowCopyTensor
(
DenseHostTensor
v
)
{
return
v
;
}
DenseHostTensor
ShallowCopyTensor
(
DenseHostTensor
v
)
{
return
v
;
}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -126,6 +144,14 @@ void RegisterTensorKernels(host_context::KernelRegistry *registry) {
...
@@ -126,6 +144,14 @@ void RegisterTensorKernels(host_context::KernelRegistry *registry) {
INFRT_KERNEL
(
TensorMapGetTensor
));
INFRT_KERNEL
(
TensorMapGetTensor
));
registry
->
AddKernel
(
"dt.tensor_map_get_size"
,
INFRT_KERNEL
(
TensorMapGetSize
));
registry
->
AddKernel
(
"dt.tensor_map_get_size"
,
INFRT_KERNEL
(
TensorMapGetSize
));
// TensorList related methods.
#ifdef INFRT_WITH_PHI
registry
->
AddKernel
(
"dt.tensor_list_get_tensor"
,
INFRT_KERNEL
(
TensorListGetTensor
));
registry
->
AddKernel
(
"dt.tensor_list_get_size"
,
INFRT_KERNEL
(
TensorListGetSize
));
#endif
registry
->
AddKernel
(
"dt.shallow_copy_tensor"
,
registry
->
AddKernel
(
"dt.shallow_copy_tensor"
,
INFRT_KERNEL
(
ShallowCopyTensor
));
INFRT_KERNEL
(
ShallowCopyTensor
));
...
...
paddle/infrt/kernel/tensorrt/CMakeLists.txt
0 → 100644
浏览文件 @
4be77e53
if
(
NOT
(
INFRT_WITH_PHI AND INFRT_WITH_GPU AND INFRT_WITH_TRT
))
return
()
endif
()
core_gather_headers
()
gather_srcs
(
infrt_src SRCS
registry.cc
trt_kernels.cc
)
paddle/infrt/kernel/tensorrt/registry.cc
0 → 100644
浏览文件 @
4be77e53
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/kernel/tensorrt/registry.h"
#include "paddle/infrt/host_context/kernel_registry.h"
#include "paddle/infrt/host_context/kernel_utils.h"
#include "paddle/infrt/kernel/tensorrt/trt_kernels.h"
namespace
infrt
{
namespace
kernel
{
void
RegisterTrtKernels
(
host_context
::
KernelRegistry
*
registry
)
{
registry
->
AddKernel
(
"trt.create_engine"
,
INFRT_KERNEL
(
tensorrt
::
CreateTrtEngine
));
registry
->
AddKernel
(
"trt.inspect_engine"
,
INFRT_KERNEL
(
tensorrt
::
PrintTrtLayer
));
registry
->
AddKernel
(
"trt.compute"
,
INFRT_KERNEL
(
tensorrt
::
TrtEngineCompute
));
}
}
// namespace kernel
}
// namespace infrt
paddle/infrt/kernel/tensorrt/registry.h
0 → 100644
浏览文件 @
4be77e53
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
namespace
infrt
{
namespace
host_context
{
struct
KernelRegistry
;
}
// namespace host_context
}
// namespace infrt
namespace
infrt
{
namespace
kernel
{
/**
* Register all the trt kernels to registry.
*/
void
RegisterTrtKernels
(
host_context
::
KernelRegistry
*
registry
);
}
// namespace kernel
}
// namespace infrt
paddle/infrt/kernel/tensorrt/trt_kernels.cc
0 → 100644
浏览文件 @
4be77e53
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/infrt/kernel/tensorrt/trt_kernels.h"
#include <string>
#include "NvInfer.h"
#include "NvInferRuntime.h"
#include "NvInferRuntimeCommon.h"
#include "glog/logging.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Operation.h"
#include "mlir/IR/Value.h"
#include "paddle/infrt/backends/tensorrt/trt_engine.h"
#include "paddle/infrt/backends/tensorrt/trt_options.h"
#include "paddle/infrt/dialect/tensorrt/trt_ops.h"
#include "paddle/infrt/host_context/symbol_table.h"
#include "paddle/phi/core/dense_tensor.h"
namespace
infrt
{
namespace
kernel
{
namespace
tensorrt
{
::
infrt
::
backends
::
tensorrt
::
TrtEngine
CreateTrtEngine
(
MlirOperationWithInfrtSymbol
create_engine_op
/*, input_tensors, output_tensors, weights*/
)
{
// TODO(wilber): The device_id needs to get from mlir.
int
device_id
=
0
;
backends
::
tensorrt
::
TrtEngine
engine
(
device_id
);
auto
*
builder
=
engine
.
GetTrtBuilder
();
// TODO(wilber): How to process weights?
backends
::
tensorrt
::
TrtUniquePtr
<
nvinfer1
::
INetworkDefinition
>
network
;
// TODO(wilber): static_shape or dynamic_shape network? The code is just
// static_shape test.
network
.
reset
(
builder
->
createNetworkV2
(
0
));
// TODO(wilber): The build option shoule be fiiled from mlir info.
backends
::
tensorrt
::
BuildOptions
options
;
options
.
max_batch
=
4
;
// Parse mlir Region which only has one block.
mlir
::
Operation
&
operation
=
*
create_engine_op
.
operation
;
auto
*
symbol_table
=
create_engine_op
.
symbol_table
;
CHECK_NOTNULL
(
symbol_table
);
unsigned
int
num_regions
=
operation
.
getNumRegions
();
CHECK_EQ
(
num_regions
,
1U
)
<<
"only support one region case."
;
auto
&
region
=
operation
.
getRegion
(
0
);
auto
&
block
=
region
.
getBlocks
().
front
();
llvm
::
DenseMap
<
mlir
::
Value
,
nvinfer1
::
ITensor
*>
map_info
;
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>
trt_bind_inputs
;
for
(
auto
index_operand
:
llvm
::
enumerate
(
operation
.
getOperands
()))
{
mlir
::
Value
operand
=
index_operand
.
value
();
size_t
idx
=
index_operand
.
index
();
const
std
::
string
input_name
=
"input_"
+
std
::
to_string
(
idx
);
auto
*
v
=
symbol_table
->
GetValue
(
std
::
to_string
(
idx
));
CHECK_NOTNULL
(
v
);
auto
*
t
=
&
v
->
get
<
phi
::
DenseTensor
>
();
trt_bind_inputs
[
input_name
]
=
t
;
// TODO(wilber): get input info from mlir.
// TODO(wilber): input dims, now only support static_shape, and just remove
// the first dimension.
// TODO(wilber): now only suppot float input.
nvinfer1
::
Dims
dims
;
dims
.
nbDims
=
t
->
dims
().
size
()
-
1
;
for
(
int
i
=
0
;
i
<
dims
.
nbDims
;
++
i
)
{
dims
.
d
[
i
]
=
t
->
dims
()[
i
+
1
];
}
auto
*
in
=
network
->
addInput
(
input_name
.
c_str
(),
nvinfer1
::
DataType
::
kFLOAT
,
dims
);
map_info
[
operand
]
=
in
;
}
// TODO(wilber): Find a way to add layer.
for
(
auto
&
inner_op
:
block
.
without_terminator
())
{
if
(
inner_op
.
getName
().
getStringRef
()
==
"trt.Activation"
)
{
trt
::
ActivationOp
act_op
=
llvm
::
dyn_cast
<
trt
::
ActivationOp
>
(
inner_op
);
auto
in_arg
=
act_op
.
getOperand
();
if
(
!
map_info
.
count
(
in_arg
))
{
CHECK
(
false
)
<<
"map_info not has in_arg."
;
}
nvinfer1
::
ActivationType
act_type
=
static_cast
<
nvinfer1
::
ActivationType
>
(
act_op
.
activation_type
());
auto
*
act_layer
=
network
->
addActivation
(
*
map_info
[
in_arg
],
act_type
);
act_layer
->
setAlpha
(
act_op
.
alpha
().
convertToFloat
());
act_layer
->
setBeta
(
act_op
.
beta
().
convertToFloat
());
for
(
size_t
i
=
0
;
i
<
act_op
->
getNumResults
();
++
i
)
{
nvinfer1
::
ITensor
*
act_out_tensor
=
act_layer
->
getOutput
(
i
);
mlir
::
Value
act_out
=
act_op
->
getResult
(
i
);
map_info
[
act_out
]
=
act_out_tensor
;
}
}
// if (inner_op.getName().getStringRef() == "trt.Constant") {
// trt::ConstantOp op = llvm::dyn_cast<trt::ConstantOp>(inner_op);
// mlir::Value op_out = op.getResult();
// std::vector<float> weight_data{1};
// auto* layer = network->addConstant(nvinfer1::Dims2(1, 1),
// nvinfer1::Weights{nvinfer1::DataType::kFLOAT, weight_data.data(), 1});
// auto* op_out_tenor = layer->getOutput(0);
// map_info[op_out] = op_out_tenor;
// }
}
for
(
auto
&
inner_op
:
block
.
without_terminator
())
{
for
(
mlir
::
Value
v
:
inner_op
.
getResults
())
{
for
(
mlir
::
Operation
*
user
:
v
.
getUsers
())
{
if
(
user
->
getName
().
getStringRef
()
==
"infrt.return"
)
{
if
(
!
map_info
.
count
(
v
))
{
CHECK
(
false
)
<<
"map_info not has value"
;
}
network
->
markOutput
(
*
map_info
[
v
]);
}
}
}
}
// std::unordered_map<std::string, phi::DenseTensor*> trt_bind_outputs;
mlir
::
Operation
*
ret
=
block
.
getTerminator
();
for
(
unsigned
int
i
=
0
;
i
<
ret
->
getNumOperands
();
++
i
)
{
mlir
::
Value
arg
=
ret
->
getOperand
(
i
);
CHECK
(
map_info
.
count
(
arg
));
map_info
[
arg
]
->
setName
((
"output_"
+
std
::
to_string
(
i
)).
c_str
());
}
for
(
int
i
=
0
;
i
<
network
->
getNbOutputs
();
++
i
)
{
engine
.
PrepareOutputHandle
(
network
->
getOutput
(
i
)
->
getName
());
}
VLOG
(
3
)
<<
"trt engine build start."
;
engine
.
Build
(
std
::
move
(
network
),
options
);
VLOG
(
3
)
<<
"trt engine build done."
;
// TODO(wilber): get inference options from mlir.
backends
::
tensorrt
::
InferenceOptions
inference_options
;
inference_options
.
batch
=
1
;
// TODO(wilber): bind trt input/output tensors.
engine
.
SetUpInference
(
inference_options
,
trt_bind_inputs
);
return
engine
;
}
void
PrintTrtLayer
(
backends
::
tensorrt
::
TrtEngine
*
engine
)
{
engine
->
GetEngineInfo
();
}
std
::
vector
<
phi
::
DenseTensor
*>
TrtEngineCompute
(
backends
::
tensorrt
::
TrtEngine
*
engine
,
const
phi
::
GPUContext
&
context
)
{
engine
->
Run
(
context
);
std
::
vector
<
phi
::
DenseTensor
*>
res
;
for
(
size_t
i
=
0
;
i
<
engine
->
GetOutputNum
();
++
i
)
{
res
.
push_back
(
engine
->
GetOutput
(
"output_"
+
std
::
to_string
(
i
)));
}
return
res
;
}
}
// namespace tensorrt
}
// namespace kernel
}
// namespace infrt
paddle/infrt/kernel/tensorrt/trt_kernels.h
0 → 100644
浏览文件 @
4be77e53
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <tuple>
#include <utility>
#include "mlir/IR/Operation.h"
#include "paddle/infrt/backends/tensorrt/trt_engine.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
namespace
infrt
{
namespace
host_context
{
class
SymbolTable
;
}
// namespace host_context
namespace
kernel
{
namespace
tensorrt
{
struct
MlirOperationWithInfrtSymbol
{
mlir
::
Operation
*
operation
;
::
infrt
::
host_context
::
SymbolTable
*
symbol_table
;
};
::
infrt
::
backends
::
tensorrt
::
TrtEngine
CreateTrtEngine
(
MlirOperationWithInfrtSymbol
engine_op
);
void
PrintTrtLayer
(
backends
::
tensorrt
::
TrtEngine
*
engine
);
std
::
vector
<
phi
::
DenseTensor
*>
TrtEngineCompute
(
backends
::
tensorrt
::
TrtEngine
*
engine
,
const
phi
::
GPUContext
&
context
);
}
// namespace tensorrt
}
// namespace kernel
}
// namespace infrt
paddle/infrt/tests/dialect/disabled_trt.mlir
0 → 100644
浏览文件 @
4be77e53
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: @run_trt
func @run_trt(%0 : !infrt.dense_tensor<GPU, FP32, NCHW>, %ctx : !phi.context<GPU>) {
%a = "trt.create_engine"(%0) ({
%1 = "trt.Activation"(%0) {activation_type = 1 : si32, alpha = 1.0 : f32, beta = 6.0 : f32} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
"infrt.return"(%1) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
}) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !trt.engine
"trt.inspect_engine"(%a) {} : (!trt.engine) -> ()
%res = "trt.compute"(%a, %ctx) {} : (!trt.engine, !phi.context<GPU>) -> (!infrt.tensor_list)
%size = "dt.tensor_list_get_size"(%res) {} : (!infrt.tensor_list) -> (i32)
"infrt.print.i32"(%size) {} : (i32) -> ()
%ts0 = "dt.tensor_list_get_tensor"(%res) {id = 0 : i32} : (!infrt.tensor_list) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.print_tensor" (%ts0) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
infrt.return
}
// CHECK-LABEL: @main
func @main() {
%ctx = "phi_dt.create_context.gpu" (): () -> !phi.context<GPU>
%t = "phi_dt.create_dense_tensor.gpu" (%ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[1:i64, 3:i64, 1:i64, 1:i64], lod=[1:i64]}: (!phi.context<GPU>) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%t) {value=[3.8:f32, 2.4:f32, 1.3:f32]} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
"phi_dt.print_tensor" (%t) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
//%res =
infrt.call @run_trt(%t, %ctx) : (!infrt.dense_tensor<GPU, FP32, NCHW>, !phi.context<GPU>) -> ()
//-> (!infrt.dense_tensor<GPU, FP32, NCHW>)
infrt.return
}
paddle/infrt/tests/dialect/rewrite.mlir
→
paddle/infrt/tests/dialect/
pd/
rewrite.mlir
浏览文件 @
4be77e53
// RUN: infrtopt --
canonicaliz
e %s | FileCheck %s
// RUN: infrtopt --
pd-op-fus
e %s | FileCheck %s
// CHECK-LABEL: @main
// CHECK-LABEL: @main
func @main() -> tensor<?xf32> {
func @main() -> tensor<?xf32> {
%a = "pd.feed"() {name="input0"} : () -> tensor<?xf32>
%a = "pd.feed"() {name="input0"} : () -> tensor<?xf32>
...
...
paddle/infrt/tests/dialect/phi/dense_tensor.mlir
浏览文件 @
4be77e53
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
// CHECK-LABEL: @sign_any_float32_execute
// CHECK-LABEL: @sign_any_float32_execute
func @sign_any_float32_execute() {
func @sign_any_float32_execute() {
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%t = "phi_dt.create_dense_tensor" (%ctx) {
%t = "phi_dt.create_dense_tensor
.cpu
" (%ctx) {
precision=#infrt.precision<FP32>,
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>, lod=[1:i64], dims=[1:i64]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
layout=#infrt.layout<NCHW>, lod=[1:i64], dims=[1:i64]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%t) {value=[3.8:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
"phi_dt.fill_dense_tensor.f32"(%t) {value=[3.8:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
...
...
paddle/infrt/tests/dialect/phi/phi_test.mlir
浏览文件 @
4be77e53
...
@@ -6,7 +6,7 @@ module {
...
@@ -6,7 +6,7 @@ module {
}
}
func @main() {
func @main() {
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%t = "phi_dt.create_dense_tensor" (%ctx) {precision=#infrt.precision<FP32>, layout=#infrt.layout<NCHW>, lod=[1:i64], dims=[1:i64]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%t = "phi_dt.create_dense_tensor
.cpu
" (%ctx) {precision=#infrt.precision<FP32>, layout=#infrt.layout<NCHW>, lod=[1:i64], dims=[1:i64]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%t) {value=[3.8:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
"phi_dt.fill_dense_tensor.f32"(%t) {value=[3.8:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
%2 = infrt.call@predict(%t) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%2 = infrt.call@predict(%t) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
phi_dt.print_tensor(%2 : !infrt.dense_tensor<CPU, FP32, NCHW>)
phi_dt.print_tensor(%2 : !infrt.dense_tensor<CPU, FP32, NCHW>)
...
...
paddle/infrt/tests/dialect/trt_ops.mlir
浏览文件 @
4be77e53
// RUN: trt-exec %s
// RUN: trt-exec %s
// CHECK-LABEL: @main
// CHECK-LABEL: @main
func @main(%bias:
tensor<?xf32>, %c:tensor<?xf32>, %b1:tensor<?xf32>, %b2:tensor<?xf32>, %bias1:tensor<?xf32>, %bias2:tensor<?xf32>) -> tensor<?xf32
> {
func @main(%bias:
!infrt.dense_tensor<GPU, FP32, NCHW>, %c:!infrt.dense_tensor<GPU, FP32, NCHW>, %b1:!infrt.dense_tensor<GPU, FP32, NCHW>, %b2:!infrt.dense_tensor<GPU, FP32, NCHW>, %bias1:!infrt.dense_tensor<GPU, FP32, NCHW>, %bias2:!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW
> {
%d = "pd.elementwise_add"(%c, %bias) {axis=-1:si32} : (
tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32
>
%d = "pd.elementwise_add"(%c, %bias) {axis=-1:si32} : (
!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW
>
%e = "pd.relu6"(%d) {} : (
tensor<?xf32>) -> tensor<?xf32
>
%e = "pd.relu6"(%d) {} : (
!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW
>
%c1 = "pd.matmul"(%e, %b1) {transpose_x=false, transpose_y=false} : (
tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32
>
%c1 = "pd.matmul"(%e, %b1) {transpose_x=false, transpose_y=false} : (
!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW
>
%d1 = "pd.elementwise_add"(%c1, %bias1) {axis=-1:si32} : (
tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32
>
%d1 = "pd.elementwise_add"(%c1, %bias1) {axis=-1:si32} : (
!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW
>
%e1 = "pd.relu"(%d1) {} : (
tensor<?xf32>) -> tensor<?xf32
>
%e1 = "pd.relu"(%d1) {} : (
!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW
>
%c2 = "pd.matmul"(%e1, %b2) {transpose_x=true, transpose_y=false} : (
tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32
>
%c2 = "pd.matmul"(%e1, %b2) {transpose_x=true, transpose_y=false} : (
!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW
>
%d2 = "pd.elementwise_add"(%c2, %bias2) {axis=-1:si32} : (
tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32
>
%d2 = "pd.elementwise_add"(%c2, %bias2) {axis=-1:si32} : (
!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW
>
%e2 = "pd.relu"(%d2) {} : (
tensor<?xf32>) -> tensor<?xf32
>
%e2 = "pd.relu"(%d2) {} : (
!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW
>
infrt.return %e2 :
tensor<?xf32
>
infrt.return %e2 :
!infrt.dense_tensor<GPU, FP32, NCHW
>
}
}
paddle/phi/api/include/tensor.h
浏览文件 @
4be77e53
...
@@ -324,7 +324,7 @@ class PADDLE_API Tensor final {
...
@@ -324,7 +324,7 @@ class PADDLE_API Tensor final {
*
*
* @return std::shared_ptr<phi::TensorBase>
* @return std::shared_ptr<phi::TensorBase>
*/
*/
std
::
shared_ptr
<
phi
::
TensorBase
>
impl
()
const
;
const
std
::
shared_ptr
<
phi
::
TensorBase
>&
impl
()
const
;
/**
/**
* @brief Set the implemention of current Tensor.
* @brief Set the implemention of current Tensor.
...
@@ -333,6 +333,13 @@ class PADDLE_API Tensor final {
...
@@ -333,6 +333,13 @@ class PADDLE_API Tensor final {
*/
*/
void
set_impl
(
const
std
::
shared_ptr
<
phi
::
TensorBase
>&
impl
);
void
set_impl
(
const
std
::
shared_ptr
<
phi
::
TensorBase
>&
impl
);
/**
* @brief Set the implemention of current Tensor.
*
* @param impl
*/
void
set_impl
(
std
::
shared_ptr
<
phi
::
TensorBase
>&&
impl
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
/**
/**
* @brief Get the stream where the tensor is currently located
* @brief Get the stream where the tensor is currently located
...
@@ -397,7 +404,9 @@ class PADDLE_API Tensor final {
...
@@ -397,7 +404,9 @@ class PADDLE_API Tensor final {
* @param blocking, Should we copy this in sync way.
* @param blocking, Should we copy this in sync way.
* @return void
* @return void
*/
*/
void
copy_
(
const
Tensor
&
src
,
const
bool
blocking
);
void
copy_
(
const
Tensor
&
src
,
const
phi
::
Place
&
target_place
,
const
bool
blocking
);
/**
/**
* @brief Cast datatype from one to another
* @brief Cast datatype from one to another
*
*
...
...
paddle/phi/api/lib/CMakeLists.txt
浏览文件 @
4be77e53
...
@@ -148,4 +148,4 @@ cc_library(phi_bw_function_api SRCS ${bw_api_source_file} DEPS phi_tensor_raw ph
...
@@ -148,4 +148,4 @@ cc_library(phi_bw_function_api SRCS ${bw_api_source_file} DEPS phi_tensor_raw ph
cc_library
(
sparse_api SRCS
${
sparse_api_source_file
}
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api_custom_impl
)
cc_library
(
sparse_api SRCS
${
sparse_api_source_file
}
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api_custom_impl
)
cc_library
(
sparse_bw_api SRCS
${
sparse_bw_api_source_file
}
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api sparse_api_custom_impl
)
cc_library
(
sparse_bw_api SRCS
${
sparse_bw_api_source_file
}
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api sparse_api_custom_impl
)
cc_library
(
phi_tensor SRCS tensor_method.cc DEPS phi_tensor_raw phi_function_api
)
cc_library
(
phi_tensor SRCS tensor_method.cc DEPS phi_tensor_raw phi_function_api
api_gen_utils kernel_dispatch infermeta
)
paddle/phi/api/lib/api_gen_utils.cc
浏览文件 @
4be77e53
...
@@ -95,12 +95,8 @@ paddle::optional<phi::MetaTensor> MakeMetaTensor(
...
@@ -95,12 +95,8 @@ paddle::optional<phi::MetaTensor> MakeMetaTensor(
/* ------------------ for output ----------------------- */
/* ------------------ for output ----------------------- */
phi
::
DenseTensor
*
SetKernelOutput
(
Backend
backend
,
Tensor
*
out
)
{
phi
::
DenseTensor
*
SetKernelOutput
(
Backend
backend
,
Tensor
*
out
)
{
if
(
!
out
->
initialized
())
{
if
(
out
->
impl
()
==
nullptr
)
{
auto
dense_tensor
=
std
::
make_shared
<
phi
::
DenseTensor
>
(
out
->
set_impl
(
std
::
make_shared
<
phi
::
DenseTensor
>
());
phi
::
make_intrusive
<
SharedStorage
>
(
phi
::
TransToPhiPlace
(
backend
)),
phi
::
DenseTensorMeta
());
out
->
set_impl
(
dense_tensor
);
return
dense_tensor
.
get
();
}
}
return
static_cast
<
phi
::
DenseTensor
*>
(
out
->
impl
().
get
());
return
static_cast
<
phi
::
DenseTensor
*>
(
out
->
impl
().
get
());
}
}
...
@@ -111,9 +107,7 @@ std::vector<phi::DenseTensor*> SetKernelOutput(size_t out_size,
...
@@ -111,9 +107,7 @@ std::vector<phi::DenseTensor*> SetKernelOutput(size_t out_size,
out
->
reserve
(
out_size
);
out
->
reserve
(
out_size
);
std
::
vector
<
phi
::
DenseTensor
*>
results
(
out_size
);
std
::
vector
<
phi
::
DenseTensor
*>
results
(
out_size
);
for
(
size_t
i
=
0
;
i
<
out_size
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
out_size
;
++
i
)
{
auto
tensor_ptr
=
std
::
make_shared
<
phi
::
DenseTensor
>
(
auto
tensor_ptr
=
std
::
make_shared
<
phi
::
DenseTensor
>
();
phi
::
make_intrusive
<
SharedStorage
>
(
phi
::
TransToPhiPlace
(
backend
)),
phi
::
DenseTensorMeta
());
results
[
i
]
=
tensor_ptr
.
get
();
results
[
i
]
=
tensor_ptr
.
get
();
out
->
emplace_back
();
out
->
emplace_back
();
out
->
back
().
set_impl
(
tensor_ptr
);
out
->
back
().
set_impl
(
tensor_ptr
);
...
...
paddle/phi/api/lib/data_transform.cc
浏览文件 @
4be77e53
...
@@ -167,10 +167,7 @@ phi::DenseTensor TransformData(const phi::DenseTensor& tensor,
...
@@ -167,10 +167,7 @@ phi::DenseTensor TransformData(const phi::DenseTensor& tensor,
if
(
NeedTransformPlace
(
if
(
NeedTransformPlace
(
out
.
place
(),
target_args_def
.
backend
,
transform_flag
))
{
out
.
place
(),
target_args_def
.
backend
,
transform_flag
))
{
phi
::
DenseTensor
result
(
phi
::
DenseTensor
result
;
phi
::
make_intrusive
<
paddle
::
experimental
::
SharedStorage
>
(
phi
::
TransToPhiPlace
(
target_args_def
.
backend
)),
{
out
.
dtype
(),
out
.
dims
(),
out
.
layout
()});
framework
::
TransDataDevice
(
framework
::
TransDataDevice
(
out
,
phi
::
TransToPhiPlace
(
target_args_def
.
backend
),
&
result
);
out
,
phi
::
TransToPhiPlace
(
target_args_def
.
backend
),
&
result
);
out
=
result
;
out
=
result
;
...
@@ -190,14 +187,14 @@ std::shared_ptr<phi::DenseTensor> PrepareData(
...
@@ -190,14 +187,14 @@ std::shared_ptr<phi::DenseTensor> PrepareData(
tensor_in
->
dtype
(),
target_args_def
.
dtype
,
transform_flag
)
&&
tensor_in
->
dtype
(),
target_args_def
.
dtype
,
transform_flag
)
&&
!
NeedTransformLayout
(
!
NeedTransformLayout
(
tensor_in
->
layout
(),
target_args_def
.
layout
,
transform_flag
)))
{
tensor_in
->
layout
(),
target_args_def
.
layout
,
transform_flag
)))
{
return
std
::
dynam
ic_pointer_cast
<
phi
::
DenseTensor
>
(
tensor_in
);
return
std
::
stat
ic_pointer_cast
<
phi
::
DenseTensor
>
(
tensor_in
);
}
}
phi
::
DenseTensor
out
=
phi
::
DenseTensor
out
=
TransformData
(
*
(
static_cast
<
phi
::
DenseTensor
*>
(
tensor_in
.
get
())),
TransformData
(
*
(
static_cast
<
phi
::
DenseTensor
*>
(
tensor_in
.
get
())),
target_args_def
,
target_args_def
,
transform_flag
);
transform_flag
);
return
std
::
make_shared
<
phi
::
DenseTensor
>
(
out
);
return
std
::
make_shared
<
phi
::
DenseTensor
>
(
std
::
move
(
out
)
);
}
}
std
::
shared_ptr
<
phi
::
DenseTensor
>
PrepareData
(
std
::
shared_ptr
<
phi
::
DenseTensor
>
PrepareData
(
...
...
paddle/phi/api/lib/tensor.cc
浏览文件 @
4be77e53
...
@@ -46,6 +46,7 @@ limitations under the License. */
...
@@ -46,6 +46,7 @@ limitations under the License. */
* In the future, the necessary components will be moved to the this library,
* In the future, the necessary components will be moved to the this library,
* or the corresponding components will be re-implemented.
* or the corresponding components will be re-implemented.
*/
*/
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/stream/cuda_stream.h"
#include "paddle/fluid/platform/stream/cuda_stream.h"
...
@@ -142,7 +143,12 @@ PlaceType Tensor::place() const {
...
@@ -142,7 +143,12 @@ PlaceType Tensor::place() const {
}
}
paddle
::
platform
::
Place
Tensor
::
inner_place
()
const
{
paddle
::
platform
::
Place
Tensor
::
inner_place
()
const
{
return
ConvertExtPlaceToInnerPlace
(
place
());
PADDLE_ENFORCE_NOT_NULL
(
impl_
,
phi
::
errors
::
PermissionDenied
(
"Null pointer error, the impl_ of Tensor should not be "
"Null when calling Tensor::inner_place()."
));
return
impl_
->
place
();
}
}
bool
Tensor
::
is_cpu
()
const
{
bool
Tensor
::
is_cpu
()
const
{
...
@@ -286,12 +292,16 @@ Tensor Tensor::slice(int64_t begin_idx, int64_t end_idx) const {
...
@@ -286,12 +292,16 @@ Tensor Tensor::slice(int64_t begin_idx, int64_t end_idx) const {
}
}
}
}
std
::
shared_ptr
<
phi
::
TensorBase
>
Tensor
::
impl
()
const
{
return
impl_
;
}
const
std
::
shared_ptr
<
phi
::
TensorBase
>
&
Tensor
::
impl
()
const
{
return
impl_
;
}
void
Tensor
::
set_impl
(
const
std
::
shared_ptr
<
phi
::
TensorBase
>
&
impl
)
{
void
Tensor
::
set_impl
(
const
std
::
shared_ptr
<
phi
::
TensorBase
>
&
impl
)
{
impl_
=
impl
;
impl_
=
impl
;
}
}
void
Tensor
::
set_impl
(
std
::
shared_ptr
<
phi
::
TensorBase
>
&&
impl
)
{
impl_
=
std
::
move
(
impl
);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
gpuStream_t
Tensor
::
stream
()
const
{
gpuStream_t
Tensor
::
stream
()
const
{
return
platform
::
stream
::
get_current_stream
(
-
1
)
->
raw_stream
();
return
platform
::
stream
::
get_current_stream
(
-
1
)
->
raw_stream
();
...
...
paddle/phi/api/lib/tensor_method.cc
浏览文件 @
4be77e53
...
@@ -19,9 +19,12 @@ limitations under the License. */
...
@@ -19,9 +19,12 @@ limitations under the License. */
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/tensor_base.h"
#include "paddle/phi/core/tensor_base.h"
#include "paddle/phi/api/lib/api_gen_utils.h"
#include "paddle/phi/api/lib/kernel_dispatch.h"
#include "paddle/phi/infermeta/unary.h"
namespace
paddle
{
namespace
paddle
{
namespace
experimental
{
namespace
experimental
{
// declare cast api
// declare cast api
Tensor
cast
(
const
Tensor
&
x
,
DataType
out_dtype
);
Tensor
cast
(
const
Tensor
&
x
,
DataType
out_dtype
);
Tensor
copy_to
(
const
Tensor
&
x
,
Backend
backend
,
bool
blocking
);
Tensor
copy_to
(
const
Tensor
&
x
,
Backend
backend
,
bool
blocking
);
...
@@ -67,12 +70,18 @@ template PADDLE_API Tensor Tensor::copy_to<phi::dtype::complex<double>>(
...
@@ -67,12 +70,18 @@ template PADDLE_API Tensor Tensor::copy_to<phi::dtype::complex<double>>(
template
PADDLE_API
Tensor
template
PADDLE_API
Tensor
Tensor
::
copy_to
<
phi
::
dtype
::
float16
>(
const
PlaceType
&
target_place
)
const
;
Tensor
::
copy_to
<
phi
::
dtype
::
float16
>(
const
PlaceType
&
target_place
)
const
;
void
Tensor
::
copy_
(
const
Tensor
&
src
,
bool
blocking
)
{
void
Tensor
::
copy_
(
const
Tensor
&
src
,
const
phi
::
Place
&
target_place
,
bool
blocking
)
{
if
(
!
src
.
is_initialized
())
{
if
(
!
src
.
is_initialized
())
{
VLOG
(
8
)
<<
"Src is empty, skip copy"
;
return
;
return
;
}
}
// Prepare copy kernel key and outputs
auto
kernel_key_set
=
ParseKernelKeyByInputArgs
(
src
);
KernelType
kernel_type
=
ParseKernelTypeByInputArgs
(
src
);
VLOG
(
3
)
<<
"Deep copy Tensor from "
<<
src
.
name
()
<<
" to "
<<
name
();
VLOG
(
3
)
<<
"Deep copy Tensor from "
<<
src
.
name
()
<<
" to "
<<
name
();
if
(
defin
ed
())
{
if
(
is_initializ
ed
())
{
PADDLE_ENFORCE_EQ
(
dtype
(),
PADDLE_ENFORCE_EQ
(
dtype
(),
src
.
dtype
(),
src
.
dtype
(),
platform
::
errors
::
PreconditionNotMet
(
platform
::
errors
::
PreconditionNotMet
(
...
@@ -87,10 +96,91 @@ void Tensor::copy_(const Tensor &src, bool blocking) {
...
@@ -87,10 +96,91 @@ void Tensor::copy_(const Tensor &src, bool blocking) {
"Copy cannot be performed!"
,
"Copy cannot be performed!"
,
name
(),
name
(),
src
.
name
()));
src
.
name
()));
PADDLE_ENFORCE_EQ
(
target_place
,
inner_place
(),
platform
::
errors
::
PreconditionNotMet
(
"Place is different of dst tensor and args %s, which "
"current tensor holds %s "
"Copy cannot be performed!"
,
target_place
.
DebugString
(),
inner_place
().
DebugString
()));
kernel_key_set
.
backend_set
=
kernel_key_set
.
backend_set
|
BackendSet
(
phi
::
TransToPhiBackend
(
inner_place
()));
}
else
{
// Deep Copy AutoGrad info from src to self.
*
autograd_meta_
=
*
(
src
.
autograd_meta_
);
}
auto
kernel_key
=
kernel_key_set
.
GetHighestPriorityKernelKey
();
auto
*
dev_ctx
=
GetDeviceContextByBackend
(
kernel_key
.
backend
());
Backend
kernel_backend
=
Backend
::
UNDEFINED
;
DataLayout
kernel_layout
=
DataLayout
::
UNDEFINED
;
DataType
kernel_data_type
=
DataType
::
UNDEFINED
;
if
(
kernel_backend
==
Backend
::
UNDEFINED
||
kernel_layout
==
DataLayout
::
UNDEFINED
||
kernel_data_type
==
DataType
::
UNDEFINED
)
{
if
(
kernel_backend
==
Backend
::
UNDEFINED
)
{
kernel_backend
=
kernel_key
.
backend
();
}
if
(
kernel_layout
==
DataLayout
::
UNDEFINED
)
{
kernel_layout
=
kernel_key
.
layout
();
}
if
(
kernel_data_type
==
DataType
::
UNDEFINED
)
{
kernel_data_type
=
kernel_key
.
dtype
();
}
}
if
(
kernel_type
==
KernelType
::
DENSE_TENSOR_KENREL
)
{
auto
kernel
=
phi
::
KernelFactory
::
Instance
().
SelectKernelOrThrowError
(
"copy"
,
{
kernel_backend
,
kernel_layout
,
kernel_data_type
});
VLOG
(
6
)
<<
"copy API kernel key: "
<<
kernel_key
;
VLOG
(
6
)
<<
"copy API kernel: "
<<
kernel
;
using
kernel_signature
=
void
(
*
)(
const
platform
::
DeviceContext
&
,
const
phi
::
DenseTensor
&
,
phi
::
Place
,
bool
,
phi
::
DenseTensor
*
);
SetKernelOutput
(
kernel_backend
,
this
);
phi
::
MetaTensor
meta_out
(
impl_
.
get
());
phi
::
UnchangedInferMeta
(
MakeMetaTensor
(
*
(
std
::
static_pointer_cast
<
phi
::
DenseTensor
>
(
src
.
impl_
))),
&
meta_out
);
auto
*
kernel_fn
=
kernel
.
GetVariadicKernelFn
<
kernel_signature
>
();
(
*
kernel_fn
)(
*
dev_ctx
,
(
*
(
std
::
static_pointer_cast
<
phi
::
DenseTensor
>
(
src
.
impl_
))),
target_place
,
blocking
,
static_cast
<
phi
::
DenseTensor
*>
(
impl_
.
get
()));
}
else
if
(
kernel_type
==
KernelType
::
SELECTED_ROWS_KENREL
)
{
auto
kernel
=
phi
::
KernelFactory
::
Instance
().
SelectKernelOrThrowError
(
"copy_sr"
,
{
kernel_backend
,
kernel_layout
,
kernel_data_type
});
VLOG
(
6
)
<<
"copy API kernel key: "
<<
kernel_key
;
VLOG
(
6
)
<<
"copy API kernel: "
<<
kernel
;
using
kernel_signature
=
void
(
*
)(
const
platform
::
DeviceContext
&
,
const
phi
::
SelectedRows
&
,
phi
::
Place
,
bool
,
phi
::
SelectedRows
*
);
SetSelectedRowsKernelOutput
(
kernel_backend
,
this
);
phi
::
MetaTensor
meta_out
(
impl_
.
get
());
phi
::
UnchangedInferMeta
(
MakeMetaTensor
(
*
(
std
::
static_pointer_cast
<
phi
::
SelectedRows
>
(
src
.
impl_
))),
&
meta_out
);
auto
*
kernel_fn
=
kernel
.
GetVariadicKernelFn
<
kernel_signature
>
();
(
*
kernel_fn
)(
*
dev_ctx
,
(
*
(
std
::
static_pointer_cast
<
phi
::
SelectedRows
>
(
src
.
impl_
))),
target_place
,
blocking
,
static_cast
<
phi
::
SelectedRows
*>
(
impl_
.
get
()));
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"We currently only support dense tensor copy for now and if u need to "
"copy selected rows please raise a issue."
));
}
}
auto
copy_tensor
=
src
.
copy_to
(
phi
::
TransToPhiBackend
(
src
.
inner_place
()),
blocking
);
set_impl
(
copy_tensor
.
impl
());
}
}
}
// namespace experimental
}
// namespace experimental
...
...
paddle/phi/backends/gpu/gpu_context.cc
浏览文件 @
4be77e53
...
@@ -741,6 +741,10 @@ struct GPUContext::Impl {
...
@@ -741,6 +741,10 @@ struct GPUContext::Impl {
GPUContext
::
GPUContext
()
:
DeviceContext
(),
impl_
(
std
::
make_unique
<
Impl
>
())
{}
GPUContext
::
GPUContext
()
:
DeviceContext
(),
impl_
(
std
::
make_unique
<
Impl
>
())
{}
GPUContext
::
GPUContext
(
GPUContext
&&
)
=
default
;
GPUContext
&
GPUContext
::
operator
=
(
GPUContext
&&
)
=
default
;
GPUContext
::
GPUContext
(
const
GPUPlace
&
place
)
GPUContext
::
GPUContext
(
const
GPUPlace
&
place
)
:
DeviceContext
(),
impl_
(
std
::
make_unique
<
Impl
>
(
place
))
{}
:
DeviceContext
(),
impl_
(
std
::
make_unique
<
Impl
>
(
place
))
{}
...
...
paddle/phi/backends/gpu/gpu_context.h
浏览文件 @
4be77e53
...
@@ -77,6 +77,8 @@ class DnnWorkspaceHandle {
...
@@ -77,6 +77,8 @@ class DnnWorkspaceHandle {
class
GPUContext
:
public
DeviceContext
{
class
GPUContext
:
public
DeviceContext
{
public:
public:
GPUContext
();
GPUContext
();
GPUContext
(
GPUContext
&&
);
GPUContext
&
operator
=
(
GPUContext
&&
);
explicit
GPUContext
(
const
GPUPlace
&
place
);
explicit
GPUContext
(
const
GPUPlace
&
place
);
...
...
paddle/phi/common/CMakeLists.txt
浏览文件 @
4be77e53
cc_library
(
phi_place SRCS place.cc
)
cc_library
(
phi_place SRCS place.cc
)
cc_library
(
scalar SRCS scalar.cc
)
cc_library
(
scalar SRCS scalar.cc
DEPS phi_enforce
)
paddle/phi/core/kernel_factory.h
浏览文件 @
4be77e53
...
@@ -197,8 +197,16 @@ class Kernel {
...
@@ -197,8 +197,16 @@ class Kernel {
const
KernelArgsDef
&
args_def
()
const
{
return
args_def_
;
}
const
KernelArgsDef
&
args_def
()
const
{
return
args_def_
;
}
const
TensorArgDef
&
InputAt
(
size_t
idx
)
const
{
return
args_def_
.
input_defs
().
at
(
idx
);
}
TensorArgDef
&
InputAt
(
size_t
idx
)
{
return
args_def_
.
input_defs
().
at
(
idx
);
}
TensorArgDef
&
InputAt
(
size_t
idx
)
{
return
args_def_
.
input_defs
().
at
(
idx
);
}
const
TensorArgDef
&
OutputAt
(
size_t
idx
)
const
{
return
args_def_
.
output_defs
().
at
(
idx
);
}
TensorArgDef
&
OutputAt
(
size_t
idx
)
{
return
args_def_
.
output_defs
().
at
(
idx
);
}
TensorArgDef
&
OutputAt
(
size_t
idx
)
{
return
args_def_
.
output_defs
().
at
(
idx
);
}
bool
IsValid
()
{
return
fn_
!=
nullptr
;
}
bool
IsValid
()
{
return
fn_
!=
nullptr
;
}
...
...
paddle/phi/infermeta/binary.cc
浏览文件 @
4be77e53
...
@@ -571,6 +571,48 @@ void GatherTreeMeta(const MetaTensor& ids,
...
@@ -571,6 +571,48 @@ void GatherTreeMeta(const MetaTensor& ids,
out
->
set_dims
(
ids_dims
);
out
->
set_dims
(
ids_dims
);
}
}
void
GridSampleBaseInferMeta
(
const
MetaTensor
&
x
,
const
MetaTensor
&
grid
,
MetaTensor
*
out
,
MetaConfig
config
)
{
auto
x_dims
=
x
.
dims
();
auto
grid_dims
=
grid
.
dims
();
PADDLE_ENFORCE_EQ
(
x_dims
.
size
(),
4
,
phi
::
errors
::
InvalidArgument
(
"Input(X) of GridSampleOp should be 4-D Tensor, but "
"received X dimension size(%d)"
,
x_dims
.
size
()));
PADDLE_ENFORCE_EQ
(
grid_dims
.
size
(),
4
,
phi
::
errors
::
InvalidArgument
(
"Input(Grid) of GridSampleOp should be 4-D Tensor, "
"but received X dimension size(%d)"
,
grid_dims
.
size
()));
if
(
config
.
is_runtime
||
grid_dims
[
3
]
>
0
)
{
PADDLE_ENFORCE_EQ
(
grid_dims
[
3
],
2
,
phi
::
errors
::
InvalidArgument
(
"Input(Grid) dimension[3] should be 2, but received %d"
,
grid_dims
[
3
]));
}
if
(
config
.
is_runtime
)
{
PADDLE_ENFORCE_EQ
(
grid_dims
[
0
],
x_dims
[
0
],
phi
::
errors
::
InvalidArgument
(
"Input(X) and Input(Grid) dimension[0] should be equal, but "
"received X dimension[0](%d) != Grid dimension[0](%d)"
,
x_dims
[
0
],
grid_dims
[
0
]));
}
out
->
set_dims
({
x_dims
[
0
],
x_dims
[
1
],
grid_dims
[
1
],
grid_dims
[
2
]});
out
->
set_dtype
(
x
.
dtype
());
out
->
share_lod
(
x
);
}
void
HuberLossInferMeta
(
const
MetaTensor
&
input
,
void
HuberLossInferMeta
(
const
MetaTensor
&
input
,
const
MetaTensor
&
label
,
const
MetaTensor
&
label
,
float
delta
,
float
delta
,
...
...
paddle/phi/infermeta/binary.h
浏览文件 @
4be77e53
...
@@ -103,6 +103,11 @@ void GatherTreeMeta(const MetaTensor& ids,
...
@@ -103,6 +103,11 @@ void GatherTreeMeta(const MetaTensor& ids,
const
MetaTensor
&
parents
,
const
MetaTensor
&
parents
,
MetaTensor
*
out
);
MetaTensor
*
out
);
void
GridSampleBaseInferMeta
(
const
MetaTensor
&
x
,
const
MetaTensor
&
grid
,
MetaTensor
*
out
,
MetaConfig
config
=
MetaConfig
());
void
HuberLossInferMeta
(
const
MetaTensor
&
input_meta
,
void
HuberLossInferMeta
(
const
MetaTensor
&
input_meta
,
const
MetaTensor
&
label_meta
,
const
MetaTensor
&
label_meta
,
float
delta
,
float
delta
,
...
...
paddle/phi/kernels/selected_rows/copy_kernel.cc
0 → 100644
浏览文件 @
4be77e53
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/selected_rows/copy_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/copy_kernel.h"
namespace
phi
{
namespace
sr
{
template
<
typename
Context
>
void
Copy
(
const
Context
&
dev_ctx
,
const
SelectedRows
&
src
,
Place
dst_place
,
bool
blocking
,
SelectedRows
*
dst
)
{
if
(
src
.
value
().
Holder
()
!=
dst
->
value
().
Holder
()
||
src
.
value
().
data
()
!=
dst
->
value
().
data
())
{
dst
->
set_rows
(
src
.
rows
());
dst
->
set_height
(
src
.
height
());
}
phi
::
Copy
<
Context
>
(
dev_ctx
,
src
.
value
(),
dst_place
,
blocking
,
dst
->
mutable_value
());
}
}
// namespace sr
}
// namespace phi
PD_REGISTER_GENERAL_KERNEL
(
copy_sr
,
CPU
,
ALL_LAYOUT
,
phi
::
sr
::
Copy
<
phi
::
CPUContext
>
,
ALL_DTYPE
)
{}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_GENERAL_KERNEL
(
copy_sr
,
GPU
,
ALL_LAYOUT
,
phi
::
sr
::
Copy
<
phi
::
GPUContext
>
,
ALL_DTYPE
)
{}
#endif
paddle/phi/kernels/selected_rows/copy_kernel.h
0 → 100644
浏览文件 @
4be77e53
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/core/sparse_csr_tensor.h"
namespace
phi
{
namespace
sr
{
template
<
typename
Context
>
void
Copy
(
const
Context
&
dev_ctx
,
const
SelectedRows
&
src
,
Place
dst_place
,
bool
blocking
,
SelectedRows
*
dst
);
}
// namespace sr
}
// namespace phi
python/paddle/fluid/dygraph/base.py
浏览文件 @
4be77e53
...
@@ -565,16 +565,25 @@ def grad(outputs,
...
@@ -565,16 +565,25 @@ def grad(outputs,
if
isinstance
(
in_out_list
,
(
list
,
tuple
)):
if
isinstance
(
in_out_list
,
(
list
,
tuple
)):
assert
len
(
in_out_list
)
>
0
,
"{} cannot be empty"
.
format
(
name
)
assert
len
(
in_out_list
)
>
0
,
"{} cannot be empty"
.
format
(
name
)
for
each_var
in
in_out_list
:
for
each_var
in
in_out_list
:
assert
isinstance
(
if
core
.
_in_eager_mode
():
each_var
,
assert
isinstance
(
core
.
VarBase
),
"Elements of {} must be Variable"
.
format
(
each_var
,
core
.
eager
.
name
)
Tensor
),
"Elements of {} must be Tensor"
.
format
(
name
)
else
:
assert
isinstance
(
each_var
,
core
.
VarBase
),
"Elements of {} must be Variable"
.
format
(
name
)
return
in_out_list
return
in_out_list
else
:
else
:
assert
isinstance
(
if
core
.
_in_eager_mode
():
in_out_list
,
assert
isinstance
(
core
.
VarBase
),
"{} must be Variable or list of Variable"
.
format
(
in_out_list
,
core
.
eager
.
name
)
Tensor
),
"{} must be Tensor or list of Tensor"
.
format
(
name
)
else
:
assert
isinstance
(
in_out_list
,
core
.
VarBase
),
"{} must be Variable or list of Variable"
.
format
(
name
)
return
[
in_out_list
]
return
[
in_out_list
]
outputs
=
check_in_out
(
outputs
,
'outputs'
)
outputs
=
check_in_out
(
outputs
,
'outputs'
)
...
@@ -586,9 +595,14 @@ def grad(outputs,
...
@@ -586,9 +595,14 @@ def grad(outputs,
for
each_var
in
grad_outputs
:
for
each_var
in
grad_outputs
:
if
each_var
is
not
None
:
if
each_var
is
not
None
:
assert
isinstance
(
if
core
.
_in_eager_mode
():
each_var
,
core
.
VarBase
assert
isinstance
(
),
"grad_outputs must be None, a Variable or a list containing None or Variables"
each_var
,
core
.
eager
.
Tensor
),
"grad_outputs must be None, a Variable or a list containing None or Variables"
else
:
assert
isinstance
(
each_var
,
core
.
VarBase
),
"grad_outputs must be None, a Variable or a list containing None or Variables"
else
:
else
:
grad_outputs
=
[]
grad_outputs
=
[]
...
@@ -600,14 +614,27 @@ def grad(outputs,
...
@@ -600,14 +614,27 @@ def grad(outputs,
no_grad_vars
=
[]
no_grad_vars
=
[]
elif
isinstance
(
no_grad_vars
,
core
.
VarBase
):
elif
isinstance
(
no_grad_vars
,
core
.
VarBase
):
no_grad_vars
=
[
no_grad_vars
]
no_grad_vars
=
[
no_grad_vars
]
elif
isinstance
(
no_grad_vars
,
core
.
eager
.
Tensor
):
no_grad_vars
=
[
no_grad_vars
]
elif
isinstance
(
no_grad_vars
,
(
list
,
tuple
,
set
)):
elif
isinstance
(
no_grad_vars
,
(
list
,
tuple
,
set
)):
no_grad_vars
=
list
(
no_grad_vars
)
no_grad_vars
=
list
(
no_grad_vars
)
for
var
in
no_grad_vars
:
for
var
in
no_grad_vars
:
assert
isinstance
(
if
core
.
_in_eager_mode
():
var
,
core
.
VarBase
),
"no_grad_vars can only contains Variable"
assert
isinstance
(
var
,
core
.
eager
.
Tensor
),
"no_grad_vars can only contains Tensor"
else
:
assert
isinstance
(
var
,
core
.
VarBase
),
"no_grad_vars can only contains Variable"
else
:
else
:
raise
AssertionError
(
if
core
.
_in_eager_mode
():
"no_grad_vars must be None, Variable or list/tuple/set of Variables"
)
raise
AssertionError
(
"no_grad_vars must be None, Tensor or list/tuple/set of Tensors"
)
else
:
raise
AssertionError
(
"no_grad_vars must be None, Variable or list/tuple/set of Variables"
)
assert
isinstance
(
create_graph
,
bool
),
"create_graph must be True or False"
assert
isinstance
(
create_graph
,
bool
),
"create_graph must be True or False"
...
@@ -622,6 +649,11 @@ def grad(outputs,
...
@@ -622,6 +649,11 @@ def grad(outputs,
assert
isinstance
(
only_inputs
,
bool
),
"only_inputs must be True or False"
assert
isinstance
(
only_inputs
,
bool
),
"only_inputs must be True or False"
assert
only_inputs
,
"only_inputs=False is not supported yet"
assert
only_inputs
,
"only_inputs=False is not supported yet"
if
core
.
_in_eager_mode
():
return
core
.
eager
.
run_partial_grad
(
outputs
,
inputs
,
grad_outputs
,
retain_graph
,
create_graph
,
only_inputs
,
allow_unused
,
no_grad_vars
)
place
=
core
.
Place
()
place
=
core
.
Place
()
place
.
set_place
(
framework
.
_current_expected_place
())
place
.
set_place
(
framework
.
_current_expected_place
())
return
core
.
dygraph_partial_grad
(
inputs
,
outputs
,
grad_outputs
,
return
core
.
dygraph_partial_grad
(
inputs
,
outputs
,
grad_outputs
,
...
...
python/paddle/fluid/dygraph/io.py
浏览文件 @
4be77e53
...
@@ -30,6 +30,7 @@ from paddle.fluid.layers import nn
...
@@ -30,6 +30,7 @@ from paddle.fluid.layers import nn
from
paddle.fluid.layers.utils
import
_hash_with_id
from
paddle.fluid.layers.utils
import
_hash_with_id
from
paddle.fluid.dygraph.base
import
switch_to_static_graph
from
paddle.fluid.dygraph.base
import
switch_to_static_graph
from
paddle.fluid.framework
import
in_dygraph_mode
from
paddle.fluid.framework
import
in_dygraph_mode
from
paddle
import
_C_ops
__all__
=
[
'TranslatedLayer'
]
__all__
=
[
'TranslatedLayer'
]
...
@@ -761,6 +762,21 @@ def _construct_params_and_buffers(model_path,
...
@@ -761,6 +762,21 @@ def _construct_params_and_buffers(model_path,
return
var_dict
return
var_dict
def
_valid_vars
(
vars
):
if
vars
:
return
vars
if
framework
.
_in_eager_mode
():
return
[
core
.
eager
.
Tensor
(
core
.
VarDesc
.
VarType
.
FP32
,
[],
"Fake_var"
,
core
.
VarDesc
.
VarType
.
RAW
,
False
)
]
else
:
return
[
core
.
VarBase
(
core
.
VarDesc
.
VarType
.
FP32
,
[],
"Fake_var"
,
core
.
VarDesc
.
VarType
.
RAW
,
False
)
]
def
_run_dygraph
(
instance
,
input
,
program_holder
):
def
_run_dygraph
(
instance
,
input
,
program_holder
):
# 1. prepare inputs, outputs, attrs
# 1. prepare inputs, outputs, attrs
...
@@ -826,17 +842,12 @@ def _run_dygraph(instance, input, program_holder):
...
@@ -826,17 +842,12 @@ def _run_dygraph(instance, input, program_holder):
# hold forward variables
# hold forward variables
if
framework
.
_in_eager_mode
():
if
framework
.
_in_eager_mode
():
tmp_scope_vec
=
core
.
eager
.
Tensor
(
tmp_scope_vec
=
[
program_holder
.
scope
]
dtype
=
core
.
VarDesc
.
VarType
.
FP32
,
dims
=
[],
name
=
"program_out_scope"
,
type
=
core
.
VarDesc
.
VarType
.
STEP_SCOPES
,
persistable
=
True
)
else
:
else
:
tmp_scope_vec
=
core
.
VarBase
(
core
.
VarDesc
.
VarType
.
FP32
,
[],
tmp_scope_vec
=
core
.
VarBase
(
core
.
VarDesc
.
VarType
.
FP32
,
[],
"program_out_scope"
,
"program_out_scope"
,
core
.
VarDesc
.
VarType
.
STEP_SCOPES
,
True
)
core
.
VarDesc
.
VarType
.
STEP_SCOPES
,
True
)
tmp_scope_vec
.
value
().
set_scope
(
program_holder
.
scope
)
tmp_scope_vec
.
value
().
set_scope
(
program_holder
.
scope
)
double_grad_vars
=
[]
double_grad_vars
=
[]
for
var_desc
in
program_holder
.
double_grad_descs
:
for
var_desc
in
program_holder
.
double_grad_descs
:
...
@@ -852,41 +863,18 @@ def _run_dygraph(instance, input, program_holder):
...
@@ -852,41 +863,18 @@ def _run_dygraph(instance, input, program_holder):
var_desc
.
shape
(),
var_desc
.
shape
(),
var_desc
.
name
(),
var_desc
.
type
(),
False
)
var_desc
.
name
(),
var_desc
.
type
(),
False
)
double_grad_vars
.
append
(
var
)
double_grad_vars
.
append
(
var
)
if
len
(
double_grad_vars
)
==
0
:
if
framework
.
_in_eager_mode
():
double_grad_vars
=
[
core
.
eager
.
Tensor
(
value
=
[
1
],
name
=
'Fake_var'
,
place
=
framework
.
_current_expected_place
())
]
else
:
double_grad_vars
=
[
core
.
VarBase
(
value
=
[
1
],
name
=
'Fake_var'
,
place
=
framework
.
_current_expected_place
())
]
# 2. run program by op
# 2. run program by op
trace_program
=
program_holder
.
infer_program
if
instance
.
_is_test
else
program_holder
.
train_program
trace_program
=
program_holder
.
infer_program
if
instance
.
_is_test
else
program_holder
.
train_program
end_op_index
=
program_holder
.
infer_program
.
block
(
0
).
op_size
()
end_op_index
=
program_holder
.
infer_program
.
block
(
0
).
op_size
()
framework
.
_dygraph_tracer
().
trace_op
(
attrs
=
(
'global_block'
,
trace_program
.
block
(
0
),
'start_op_index'
,
0
,
type
=
'run_program'
,
'end_op_index'
,
end_op_index
,
'is_test'
,
instance
.
_is_test
,
inputs
=
{
'X'
:
input_vars
,
'program_id'
,
_hash_with_id
(
trace_program
,
instance
))
'Params'
:
persistable_vars
},
_C_ops
.
run_program
(
outputs
=
{
_valid_vars
(
input_vars
),
'Out'
:
output_vars
,
_valid_vars
(
persistable_vars
),
'OutScope'
:
tmp_scope_vec
,
_valid_vars
(
output_vars
),
tmp_scope_vec
,
'DOut'
:
double_grad_vars
_valid_vars
(
double_grad_vars
),
*
attrs
)
},
attrs
=
{
'global_block'
:
trace_program
.
block
(
0
),
'start_op_index'
:
0
,
'end_op_index'
:
end_op_index
,
'is_test'
:
instance
.
_is_test
,
'program_id'
:
_hash_with_id
(
trace_program
,
instance
)
})
# NOTE: [ why need set param's gradient type here ]
# NOTE: [ why need set param's gradient type here ]
# if user set sparse gradient mode, the param's gradient
# if user set sparse gradient mode, the param's gradient
# will be SelectedRows, not LoDTensor. But tracer will just
# will be SelectedRows, not LoDTensor. But tracer will just
...
@@ -914,8 +902,10 @@ def _run_dygraph(instance, input, program_holder):
...
@@ -914,8 +902,10 @@ def _run_dygraph(instance, input, program_holder):
def
drop_scope_if_no_grad
(
instance
,
scope_vec
):
def
drop_scope_if_no_grad
(
instance
,
scope_vec
):
tracer
=
framework
.
_dygraph_tracer
()
tracer
=
framework
.
_dygraph_tracer
()
scope
=
scope_vec
.
value
().
get_scope
()
if
isinstance
(
scope_vec
,
(
core
.
VarBase
))
else
scope_vec
[
0
]
if
(
not
instance
.
_is_test
)
and
(
not
tracer
.
_has_grad
):
if
(
not
instance
.
_is_test
)
and
(
not
tracer
.
_has_grad
):
scope
_vec
.
value
().
get_scope
()
.
drop_kids
()
scope
.
drop_kids
()
def
_run_static_graph
(
input
,
program_holder
,
trace_program
):
def
_run_static_graph
(
input
,
program_holder
,
trace_program
):
...
...
python/paddle/fluid/dygraph/jit.py
浏览文件 @
4be77e53
...
@@ -821,7 +821,7 @@ def save(layer, path, input_spec=None, **configs):
...
@@ -821,7 +821,7 @@ def save(layer, path, input_spec=None, **configs):
for
var
in
flatten
(
input_spec
):
for
var
in
flatten
(
input_spec
):
if
isinstance
(
var
,
paddle
.
static
.
InputSpec
):
if
isinstance
(
var
,
paddle
.
static
.
InputSpec
):
inner_input_spec
.
append
(
var
)
inner_input_spec
.
append
(
var
)
elif
isinstance
(
var
,
(
core
.
VarBase
,
Variable
)):
elif
isinstance
(
var
,
(
core
.
VarBase
,
core
.
eager
.
Tensor
,
Variable
)):
inner_input_spec
.
append
(
inner_input_spec
.
append
(
paddle
.
static
.
InputSpec
.
from_tensor
(
var
))
paddle
.
static
.
InputSpec
.
from_tensor
(
var
))
else
:
else
:
...
...
python/paddle/fluid/dygraph/layers.py
浏览文件 @
4be77e53
...
@@ -760,7 +760,8 @@ class Layer(object):
...
@@ -760,7 +760,8 @@ class Layer(object):
raise
KeyError
(
"The name of buffer can not be empty."
)
raise
KeyError
(
"The name of buffer can not be empty."
)
elif
hasattr
(
self
,
name
)
and
name
not
in
self
.
_buffers
:
elif
hasattr
(
self
,
name
)
and
name
not
in
self
.
_buffers
:
raise
KeyError
(
"attribute '{}' already exists."
.
format
(
name
))
raise
KeyError
(
"attribute '{}' already exists."
.
format
(
name
))
elif
tensor
is
not
None
and
not
type
(
tensor
)
==
core
.
VarBase
:
elif
tensor
is
not
None
and
not
(
type
(
tensor
)
==
core
.
VarBase
or
type
(
tensor
)
==
core
.
eager
.
Tensor
):
raise
TypeError
(
raise
TypeError
(
"The registered buffer should be a core.VarBase, but received {}."
.
"The registered buffer should be a core.VarBase, but received {}."
.
format
(
type
(
tensor
).
__name__
))
format
(
type
(
tensor
).
__name__
))
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
4be77e53
...
@@ -1118,9 +1118,9 @@ set_tests_properties(test_cumprod_op PROPERTIES TIMEOUT 120)
...
@@ -1118,9 +1118,9 @@ set_tests_properties(test_cumprod_op PROPERTIES TIMEOUT 120)
set_tests_properties
(
test_split_program PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_split_program PROPERTIES TIMEOUT 120
)
if
(
WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL
)
if
(
WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL
)
set_tests_properties
(
test_parallel_dygraph_dataparallel PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_parallel_dygraph_dataparallel PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT 1
2
0
)
set_tests_properties
(
test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT 1
5
0
)
set_tests_properties
(
test_parallel_dygraph_control_flow PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_parallel_dygraph_control_flow PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_parallel_dygraph_no_sync PROPERTIES TIMEOUT 1
2
0
)
set_tests_properties
(
test_parallel_dygraph_no_sync PROPERTIES TIMEOUT 1
5
0
)
set_tests_properties
(
test_parallel_dygraph_no_sync_gradient_check PROPERTIES TIMEOUT 30
)
set_tests_properties
(
test_parallel_dygraph_no_sync_gradient_check PROPERTIES TIMEOUT 30
)
set_tests_properties
(
test_parallel_dygraph_pipeline_parallel PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_parallel_dygraph_pipeline_parallel PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_parallel_dygraph_tensor_parallel PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_parallel_dygraph_tensor_parallel PROPERTIES TIMEOUT 200
)
...
...
python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
浏览文件 @
4be77e53
...
@@ -27,6 +27,7 @@ from paddle.fluid.dygraph.nn import Conv2D, Linear, Pool2D
...
@@ -27,6 +27,7 @@ from paddle.fluid.dygraph.nn import Conv2D, Linear, Pool2D
from
paddle.fluid.optimizer
import
AdamOptimizer
from
paddle.fluid.optimizer
import
AdamOptimizer
from
paddle.fluid.dygraph.io
import
INFER_MODEL_SUFFIX
,
INFER_PARAMS_SUFFIX
from
paddle.fluid.dygraph.io
import
INFER_MODEL_SUFFIX
,
INFER_PARAMS_SUFFIX
from
paddle.fluid.dygraph.dygraph_to_static
import
ProgramTranslator
from
paddle.fluid.dygraph.dygraph_to_static
import
ProgramTranslator
from
paddle.fluid.framework
import
_test_eager_guard
from
predictor_utils
import
PredictorTools
from
predictor_utils
import
PredictorTools
...
@@ -155,6 +156,13 @@ class TestMNISTWithToStatic(TestMNIST):
...
@@ -155,6 +156,13 @@ class TestMNISTWithToStatic(TestMNIST):
np
.
allclose
(
dygraph_loss
,
static_loss
),
np
.
allclose
(
dygraph_loss
,
static_loss
),
msg
=
'dygraph is {}
\n
static_res is
\n
{}'
.
format
(
dygraph_loss
,
msg
=
'dygraph is {}
\n
static_res is
\n
{}'
.
format
(
dygraph_loss
,
static_loss
))
static_loss
))
with
_test_eager_guard
():
dygraph_loss
=
self
.
train_dygraph
()
static_loss
=
self
.
train_static
()
self
.
assertTrue
(
np
.
allclose
(
dygraph_loss
,
static_loss
),
msg
=
'dygraph is {}
\n
static_res is
\n
{}'
.
format
(
dygraph_loss
,
static_loss
))
def
test_mnist_declarative_cpu_vs_mkldnn
(
self
):
def
test_mnist_declarative_cpu_vs_mkldnn
(
self
):
dygraph_loss_cpu
=
self
.
train_dygraph
()
dygraph_loss_cpu
=
self
.
train_dygraph
()
...
...
python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_in_eager_mode.py
浏览文件 @
4be77e53
...
@@ -19,6 +19,7 @@ import unittest
...
@@ -19,6 +19,7 @@ import unittest
import
os
import
os
import
numpy
as
np
import
numpy
as
np
import
random
import
random
import
socket
import
paddle
import
paddle
import
paddle.nn
as
nn
import
paddle.nn
as
nn
...
@@ -31,13 +32,26 @@ from paddle.optimizer import SGD
...
@@ -31,13 +32,26 @@ from paddle.optimizer import SGD
from
paddle.fluid.initializer
import
NumpyArrayInitializer
from
paddle.fluid.initializer
import
NumpyArrayInitializer
def
net_is_used
(
port
,
ip
=
'127.0.0.1'
):
s
=
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
try
:
s
.
connect
((
ip
,
port
))
s
.
shutdown
(
2
)
return
True
except
Exception
as
e
:
return
False
def
init_process_group
(
strategy
=
None
):
def
init_process_group
(
strategy
=
None
):
nranks
=
ParallelEnv
().
nranks
nranks
=
ParallelEnv
().
nranks
rank
=
ParallelEnv
().
local_rank
rank
=
ParallelEnv
().
local_rank
is_master
=
True
if
rank
==
0
else
False
is_master
=
True
if
rank
==
0
else
False
store
=
paddle
.
fluid
.
core
.
TCPStore
(
"127.0.0.1"
,
6172
,
is_master
,
nranks
)
for
port
in
range
(
20000
,
21000
):
group
=
core
.
ProcessGroupNCCL
(
store
,
rank
,
nranks
)
if
not
net_is_used
(
port
):
return
group
store
=
paddle
.
fluid
.
core
.
TCPStore
(
"127.0.0.1"
,
port
,
is_master
,
nranks
)
group
=
core
.
ProcessGroupNCCL
(
store
,
rank
,
nranks
)
return
group
class
LinearModel
(
nn
.
Layer
):
class
LinearModel
(
nn
.
Layer
):
...
...
python/paddle/fluid/tests/unittests/test_egr_python_api.py
浏览文件 @
4be77e53
...
@@ -52,7 +52,7 @@ class EagerScaleTestCase(unittest.TestCase):
...
@@ -52,7 +52,7 @@ class EagerScaleTestCase(unittest.TestCase):
out_eager
=
core
.
eager
.
scale
(
data_eager
,
1.0
,
0.9
,
True
,
True
)
out_eager
=
core
.
eager
.
scale
(
data_eager
,
1.0
,
0.9
,
True
,
True
)
self
.
assertIsNone
(
data_eager
.
grad
)
self
.
assertIsNone
(
data_eager
.
grad
)
out_eager
.
backward
(
grad_eager
,
False
)
out_eager
.
backward
(
grad_eager
,
False
)
self
.
assert
True
(
data_eager
.
grad
.
_is_initialized
()
)
self
.
assert
IsNotNone
(
data_eager
.
grad
)
self
.
assertTrue
(
np
.
array_equal
(
data_eager
.
grad
.
numpy
(),
input_data
))
self
.
assertTrue
(
np
.
array_equal
(
data_eager
.
grad
.
numpy
(),
input_data
))
def
test_retain_grad_and_run_backward_raises
(
self
):
def
test_retain_grad_and_run_backward_raises
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_imperative_double_grad.py
浏览文件 @
4be77e53
# Copyright (c) 202
0
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 202
2
PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
...
@@ -19,6 +19,9 @@ from paddle.vision.models import resnet50, resnet101
...
@@ -19,6 +19,9 @@ from paddle.vision.models import resnet50, resnet101
import
unittest
import
unittest
from
unittest
import
TestCase
from
unittest
import
TestCase
import
numpy
as
np
import
numpy
as
np
import
paddle.compat
as
cpt
from
paddle.fluid.framework
import
_test_eager_guard
import
paddle.fluid.core
as
core
def
_dygraph_guard_
(
func
):
def
_dygraph_guard_
(
func
):
...
@@ -40,6 +43,80 @@ def random_var(size, low=-1, high=1, dtype='float32'):
...
@@ -40,6 +43,80 @@ def random_var(size, low=-1, high=1, dtype='float32'):
return
fluid
.
dygraph
.
to_variable
(
x_np
)
return
fluid
.
dygraph
.
to_variable
(
x_np
)
class
TestEagerGrad
(
TestCase
):
def
func_simple_example_eager_grad
(
self
):
np
.
random
.
seed
(
2021
)
paddle
.
set_device
(
'cpu'
)
np_x
=
np
.
random
.
random
((
3
,
3
))
np_y
=
np
.
random
.
random
((
3
,
1
))
x
=
paddle
.
to_tensor
(
np_x
,
dtype
=
"float64"
,
stop_gradient
=
False
)
y
=
paddle
.
to_tensor
(
np_y
,
dtype
=
"float64"
,
stop_gradient
=
False
)
out
=
paddle
.
matmul
(
x
,
y
)
dx
=
fluid
.
dygraph
.
grad
(
out
,
x
)
dout
=
np
.
ones_like
(
np_y
)
expected_dx
=
np
.
matmul
(
dout
,
np
.
transpose
(
np_y
))
# stop_gradient = !create_graph, create_graph default false
self
.
assertEqual
(
dx
[
0
].
stop_gradient
,
True
)
self
.
assertTrue
(
np
.
allclose
(
dx
[
0
].
numpy
(),
expected_dx
[
0
]))
def
test_simple_example_eager_grad
(
self
):
with
_test_eager_guard
():
self
.
func_simple_example_eager_grad
()
self
.
func_simple_example_eager_grad
()
def
func_simple_example_eager_grad_allow_unused
(
self
):
np
.
random
.
seed
(
2021
)
paddle
.
set_device
(
'cpu'
)
np_x
=
np
.
random
.
random
((
3
,
3
))
np_y
=
np
.
random
.
random
((
3
,
1
))
np_z
=
np
.
random
.
random
((
3
,
1
))
x
=
paddle
.
to_tensor
(
np_x
,
dtype
=
"float64"
,
stop_gradient
=
False
)
y
=
paddle
.
to_tensor
(
np_y
,
dtype
=
"float64"
,
stop_gradient
=
False
)
z
=
paddle
.
to_tensor
(
np_z
,
dtype
=
"float64"
,
stop_gradient
=
False
)
out_z
=
paddle
.
nn
.
functional
.
sigmoid
(
z
)
out
=
paddle
.
matmul
(
x
,
y
)
dx
=
fluid
.
dygraph
.
grad
(
out
,
[
x
,
z
],
allow_unused
=
True
)
dout
=
np
.
ones_like
(
np_y
)
expected_dx
=
np
.
matmul
(
dout
,
np
.
transpose
(
np_y
))
self
.
assertTrue
(
np
.
allclose
(
dx
[
0
].
numpy
(),
expected_dx
[
0
]))
# stop_gradient = !create_graph, create_graph default false
self
.
assertEqual
(
dx
[
0
].
stop_gradient
,
True
)
# x is unused input in the graph
self
.
assertEqual
(
dx
[
1
],
None
)
def
test_simple_example_eager_grad_allow_unused
(
self
):
with
_test_eager_guard
():
self
.
func_simple_example_eager_grad_allow_unused
()
self
.
func_simple_example_eager_grad_allow_unused
()
def
func_simple_example_eager_grad_not_allow_unused
(
self
):
np
.
random
.
seed
(
2021
)
paddle
.
set_device
(
'cpu'
)
np_x
=
np
.
random
.
random
((
3
,
3
))
np_y
=
np
.
random
.
random
((
3
,
1
))
np_z
=
np
.
random
.
random
((
3
,
1
))
x
=
paddle
.
to_tensor
(
np_x
,
dtype
=
"float64"
,
stop_gradient
=
False
)
y
=
paddle
.
to_tensor
(
np_y
,
dtype
=
"float64"
,
stop_gradient
=
False
)
z
=
paddle
.
to_tensor
(
np_z
,
dtype
=
"float64"
,
stop_gradient
=
False
)
out_z
=
paddle
.
nn
.
functional
.
sigmoid
(
z
)
out
=
paddle
.
matmul
(
x
,
y
)
try
:
# allow_unused is false in default
dx
=
fluid
.
dygraph
.
grad
(
out
,
[
x
,
z
])
except
ValueError
as
e
:
error_msg
=
cpt
.
get_exception_message
(
e
)
assert
error_msg
.
find
(
"allow_unused"
)
>
0
def
test_simple_example_eager_grad_not_allow_unused
(
self
):
with
_test_eager_guard
():
self
.
func_simple_example_eager_grad_not_allow_unused
()
self
.
func_simple_example_eager_grad_not_allow_unused
()
class
TestDygraphDoubleGrad
(
TestCase
):
class
TestDygraphDoubleGrad
(
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
sort_sum_gradient
=
False
self
.
sort_sum_gradient
=
False
...
@@ -64,7 +141,7 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -64,7 +141,7 @@ class TestDygraphDoubleGrad(TestCase):
allow_unused
=
allow_unused
)
allow_unused
=
allow_unused
)
@
dygraph_guard
@
dygraph_guard
def
test
_exception
(
self
):
def
func
_exception
(
self
):
with
self
.
assertRaises
(
AssertionError
):
with
self
.
assertRaises
(
AssertionError
):
self
.
grad
(
None
,
None
)
self
.
grad
(
None
,
None
)
...
@@ -93,8 +170,13 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -93,8 +170,13 @@ class TestDygraphDoubleGrad(TestCase):
with
self
.
assertRaises
(
AssertionError
):
with
self
.
assertRaises
(
AssertionError
):
self
.
grad
([
random_var
(
shape
)],
[
random_var
(
shape
)],
no_grad_vars
=
1
)
self
.
grad
([
random_var
(
shape
)],
[
random_var
(
shape
)],
no_grad_vars
=
1
)
def
test_exception
(
self
):
with
_test_eager_guard
():
self
.
func_exception
()
self
.
func_exception
()
@
dygraph_guard
@
dygraph_guard
def
test
_simple_example
(
self
):
def
func
_simple_example
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x
.
stop_gradient
=
False
x
.
stop_gradient
=
False
y
=
x
+
1
y
=
x
+
1
...
@@ -123,8 +205,44 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -123,8 +205,44 @@ class TestDygraphDoubleGrad(TestCase):
self
.
assertNotEqual
(
grad_with_none_and_not_none
.
stop_gradient
,
self
.
assertNotEqual
(
grad_with_none_and_not_none
.
stop_gradient
,
create_graph
)
create_graph
)
def
test_simple_example
(
self
):
with
_test_eager_guard
():
self
.
func_simple_example
()
self
.
func_simple_example
()
@
dygraph_guard
@
dygraph_guard
def
test_none_one_initial_gradient
(
self
):
def
func_example_no_grad_vars
(
self
):
x
=
random_var
(
self
.
shape
)
x_np
=
x
.
numpy
()
numel
=
x_np
.
size
x
.
stop_gradient
=
False
y1
=
fluid
.
layers
.
relu
(
x
)
y2
=
fluid
.
layers
.
relu
(
x
)
z
=
y1
+
y2
w
=
z
*
z
w_mean
=
fluid
.
layers
.
reduce_mean
(
w
)
del
y1
,
z
,
w
dx_actual
,
=
self
.
grad
(
[
w_mean
],
[
x
],
create_graph
=
True
,
no_grad_vars
=
[
y2
])
self
.
assertFalse
(
y2
.
stop_gradient
)
self
.
assertFalse
(
dx_actual
.
stop_gradient
)
dx_expected
=
(
1.0
/
float
(
numel
)
*
(
np
.
maximum
(
x_np
,
0
)
+
y2
.
numpy
())
*
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
def
test_example_no_grad_vars
(
self
):
with
_test_eager_guard
():
self
.
func_example_no_grad_vars
()
self
.
func_example_no_grad_vars
()
@
dygraph_guard
def
func_none_one_initial_gradient
(
self
):
numel
=
1
numel
=
1
for
s
in
self
.
shape
:
for
s
in
self
.
shape
:
numel
*=
s
numel
*=
s
...
@@ -190,8 +308,13 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -190,8 +308,13 @@ class TestDygraphDoubleGrad(TestCase):
np
.
array_equal
(
grad_z
.
numpy
(),
np
.
array_equal
(
grad_z
.
numpy
(),
original_random_grad_z
))
original_random_grad_z
))
def
test_none_one_initial_gradient
(
self
):
with
_test_eager_guard
():
self
.
func_none_one_initial_gradient
()
self
.
func_none_one_initial_gradient
()
@
dygraph_guard
@
dygraph_guard
def
test
_example_with_gradient_accumulation_and_create_graph
(
self
):
def
func
_example_with_gradient_accumulation_and_create_graph
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x_np
=
x
.
numpy
()
x_np
=
x
.
numpy
()
numel
=
x_np
.
size
numel
=
x_np
.
size
...
@@ -214,25 +337,33 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -214,25 +337,33 @@ class TestDygraphDoubleGrad(TestCase):
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
if
core
.
_in_eager_mode
():
loss
.
backward
(
retain_graph
=
True
)
pass
else
:
x_grad_actual
=
x
.
gradient
()
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
(
x_np
+
dx_expected
*
(
x_np
>
0
)
*
2
/
float
(
numel
))).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
for
i
in
range
(
5
):
loss
.
backward
(
retain_graph
=
True
)
loss
.
backward
(
retain_graph
=
True
)
x_grad_actual
=
x
.
gradient
()
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
i
+
2
)
*
(
2.0
/
float
(
numel
)
*
(
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
(
x_np
+
dx_expected
*
x_np
+
dx_expected
*
(
x_np
>
0
)
*
2
/
float
(
numel
))).
astype
(
'float32'
)
(
x_np
>
0
)
*
2
/
float
(
numel
))).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
for
i
in
range
(
5
):
loss
.
backward
(
retain_graph
=
True
)
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
i
+
2
)
*
(
2.0
/
float
(
numel
)
*
(
x_np
+
dx_expected
*
(
x_np
>
0
)
*
2
/
float
(
numel
))).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
def
test_example_with_gradient_accumulation_and_create_graph
(
self
):
with
_test_eager_guard
():
self
.
func_example_with_gradient_accumulation_and_create_graph
()
self
.
func_example_with_gradient_accumulation_and_create_graph
()
@
dygraph_guard
@
dygraph_guard
def
test
_example_with_gradient_accumulation_and_no_grad_vars
(
self
):
def
func
_example_with_gradient_accumulation_and_no_grad_vars
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x_np
=
x
.
numpy
()
x_np
=
x
.
numpy
()
numel
=
x_np
.
size
numel
=
x_np
.
size
...
@@ -256,17 +387,25 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -256,17 +387,25 @@ class TestDygraphDoubleGrad(TestCase):
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
if
core
.
_in_eager_mode
():
loss
.
backward
()
pass
else
:
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
loss
.
backward
()
x_grad_actual
=
x
.
gradient
()
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
(
(
x_np
+
dx_expected
*
x_np
+
dx_expected
*
(
x_np
>
0
)
*
4
/
float
(
numel
))).
astype
(
'float32'
)
(
x_np
>
0
)
*
4
/
float
(
numel
))).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
def
test_example_with_gradient_accumulation_and_no_grad_vars
(
self
):
with
_test_eager_guard
():
self
.
func_example_with_gradient_accumulation_and_no_grad_vars
()
self
.
func_example_with_gradient_accumulation_and_no_grad_vars
()
@
dygraph_guard
@
dygraph_guard
def
test
_example_with_gradient_accumulation_and_not_create_graph
(
self
):
def
func
_example_with_gradient_accumulation_and_not_create_graph
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x_np
=
x
.
numpy
()
x_np
=
x
.
numpy
()
numel
=
x_np
.
size
numel
=
x_np
.
size
...
@@ -289,12 +428,20 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -289,12 +428,20 @@ class TestDygraphDoubleGrad(TestCase):
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
if
core
.
_in_eager_mode
():
loss
.
backward
()
pass
else
:
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
loss
.
backward
()
x_grad_actual
=
x
.
gradient
()
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
2.0
*
x_np
/
float
(
numel
)).
astype
(
'float32'
)
x_grad_expected
=
(
2.0
*
x_np
/
float
(
numel
)).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
def
test_example_with_gradient_accumulation_and_not_create_graph
(
self
):
with
_test_eager_guard
():
self
.
func_example_with_gradient_accumulation_and_not_create_graph
()
self
.
func_example_with_gradient_accumulation_and_not_create_graph
()
class
TestDygraphDoubleGradSortGradient
(
TestDygraphDoubleGrad
):
class
TestDygraphDoubleGradSortGradient
(
TestDygraphDoubleGrad
):
...
@@ -304,7 +451,7 @@ class TestDygraphDoubleGradSortGradient(TestDygraphDoubleGrad):
...
@@ -304,7 +451,7 @@ class TestDygraphDoubleGradSortGradient(TestDygraphDoubleGrad):
class
TestDygraphDoubleGradVisitedUniq
(
TestCase
):
class
TestDygraphDoubleGradVisitedUniq
(
TestCase
):
def
test
_compare
(
self
):
def
func
_compare
(
self
):
value
=
np
.
random
.
uniform
(
-
0.5
,
0.5
,
100
).
reshape
(
10
,
2
,
value
=
np
.
random
.
uniform
(
-
0.5
,
0.5
,
100
).
reshape
(
10
,
2
,
5
).
astype
(
"float32"
)
5
).
astype
(
"float32"
)
...
@@ -349,6 +496,11 @@ class TestDygraphDoubleGradVisitedUniq(TestCase):
...
@@ -349,6 +496,11 @@ class TestDygraphDoubleGradVisitedUniq(TestCase):
self
.
assertTrue
(
np
.
array_equal
(
grad_1
,
grad_2
))
self
.
assertTrue
(
np
.
array_equal
(
grad_1
,
grad_2
))
def
test_compare
(
self
):
with
_test_eager_guard
():
self
.
func_compare
()
self
.
func_compare
()
class
TestRaiseNoDoubleGradOp
(
TestCase
):
class
TestRaiseNoDoubleGradOp
(
TestCase
):
def
raise_no_grad_op
(
self
):
def
raise_no_grad_op
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py
浏览文件 @
4be77e53
# Copyright (c) 202
0
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 202
2
PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
...
@@ -18,6 +18,8 @@ import unittest
...
@@ -18,6 +18,8 @@ import unittest
from
unittest
import
TestCase
from
unittest
import
TestCase
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
from
paddle.fluid.framework
import
_test_eager_guard
import
paddle.fluid.core
as
core
def
_dygraph_guard_
(
func
):
def
_dygraph_guard_
(
func
):
...
@@ -62,7 +64,7 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -62,7 +64,7 @@ class TestDygraphDoubleGrad(TestCase):
allow_unused
=
allow_unused
)
allow_unused
=
allow_unused
)
@
dygraph_guard
@
dygraph_guard
def
test
_exception
(
self
):
def
func
_exception
(
self
):
with
self
.
assertRaises
(
AssertionError
):
with
self
.
assertRaises
(
AssertionError
):
self
.
grad
(
None
,
None
)
self
.
grad
(
None
,
None
)
...
@@ -91,8 +93,13 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -91,8 +93,13 @@ class TestDygraphDoubleGrad(TestCase):
with
self
.
assertRaises
(
AssertionError
):
with
self
.
assertRaises
(
AssertionError
):
self
.
grad
([
random_var
(
shape
)],
[
random_var
(
shape
)],
no_grad_vars
=
1
)
self
.
grad
([
random_var
(
shape
)],
[
random_var
(
shape
)],
no_grad_vars
=
1
)
def
test_exception
(
self
):
with
_test_eager_guard
():
self
.
func_exception
()
self
.
func_exception
()
@
dygraph_guard
@
dygraph_guard
def
test
_simple_example
(
self
):
def
func
_simple_example
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x
.
stop_gradient
=
False
x
.
stop_gradient
=
False
y
=
x
+
1
y
=
x
+
1
...
@@ -121,8 +128,13 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -121,8 +128,13 @@ class TestDygraphDoubleGrad(TestCase):
self
.
assertNotEqual
(
grad_with_none_and_not_none
.
stop_gradient
,
self
.
assertNotEqual
(
grad_with_none_and_not_none
.
stop_gradient
,
create_graph
)
create_graph
)
def
test_simple_example
(
self
):
with
_test_eager_guard
():
self
.
func_simple_example
()
self
.
func_simple_example
()
@
dygraph_guard
@
dygraph_guard
def
test
_none_one_initial_gradient
(
self
):
def
func
_none_one_initial_gradient
(
self
):
numel
=
1
numel
=
1
for
s
in
self
.
shape
:
for
s
in
self
.
shape
:
numel
*=
s
numel
*=
s
...
@@ -188,8 +200,13 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -188,8 +200,13 @@ class TestDygraphDoubleGrad(TestCase):
np
.
array_equal
(
grad_z
.
numpy
(),
np
.
array_equal
(
grad_z
.
numpy
(),
original_random_grad_z
))
original_random_grad_z
))
def
test_none_one_initial_gradient
(
self
):
with
_test_eager_guard
():
self
.
func_none_one_initial_gradient
()
self
.
func_none_one_initial_gradient
()
@
dygraph_guard
@
dygraph_guard
def
test
_example_with_gradient_accumulation_and_create_graph
(
self
):
def
func
_example_with_gradient_accumulation_and_create_graph
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x_np
=
x
.
numpy
()
x_np
=
x
.
numpy
()
numel
=
x_np
.
size
numel
=
x_np
.
size
...
@@ -212,17 +229,25 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -212,17 +229,25 @@ class TestDygraphDoubleGrad(TestCase):
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
if
core
.
_in_eager_mode
():
loss
.
backward
()
pass
else
:
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
loss
.
backward
()
x_grad_actual
=
x
.
gradient
()
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
(
(
x_np
+
dx_expected
*
x_np
+
dx_expected
*
(
x_np
>
0
)
*
2
/
float
(
numel
))).
astype
(
'float32'
)
(
x_np
>
0
)
*
2
/
float
(
numel
))).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
def
test_example_with_gradient_accumulation_and_create_graph
(
self
):
with
_test_eager_guard
():
self
.
func_example_with_gradient_accumulation_and_create_graph
()
self
.
func_example_with_gradient_accumulation_and_create_graph
()
@
dygraph_guard
@
dygraph_guard
def
test
_example_with_gradient_accumulation_and_no_grad_vars
(
self
):
def
func
_example_with_gradient_accumulation_and_no_grad_vars
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x_np
=
x
.
numpy
()
x_np
=
x
.
numpy
()
numel
=
x_np
.
size
numel
=
x_np
.
size
...
@@ -246,17 +271,25 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -246,17 +271,25 @@ class TestDygraphDoubleGrad(TestCase):
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
(
x_np
>
0
)
*
2
).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
if
core
.
_in_eager_mode
():
loss
.
backward
()
pass
else
:
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
loss
.
backward
()
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
(
x_np
+
dx_expected
*
(
x_np
>
0
)
*
4
/
float
(
numel
))).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
x_grad_actual
=
x
.
gradient
()
def
test_example_with_gradient_accumulation_and_no_grad_vars
(
self
):
x_grad_expected
=
(
2.0
/
float
(
numel
)
*
with
_test_eager_guard
():
(
x_np
+
dx_expected
*
self
.
func_example_with_gradient_accumulation_and_no_grad_vars
()
(
x_np
>
0
)
*
4
/
float
(
numel
))).
astype
(
'float32'
)
self
.
func_example_with_gradient_accumulation_and_no_grad_vars
()
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
@
dygraph_guard
@
dygraph_guard
def
test
_example_with_gradient_accumulation_and_not_create_graph
(
self
):
def
func
_example_with_gradient_accumulation_and_not_create_graph
(
self
):
x
=
random_var
(
self
.
shape
)
x
=
random_var
(
self
.
shape
)
x_np
=
x
.
numpy
()
x_np
=
x
.
numpy
()
numel
=
x_np
.
size
numel
=
x_np
.
size
...
@@ -279,12 +312,20 @@ class TestDygraphDoubleGrad(TestCase):
...
@@ -279,12 +312,20 @@ class TestDygraphDoubleGrad(TestCase):
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
self
.
assertTrue
(
np
.
allclose
(
dx_actual
.
numpy
(),
dx_expected
))
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
if
core
.
_in_eager_mode
():
loss
.
backward
()
pass
else
:
loss
=
fluid
.
layers
.
reduce_mean
(
dx_actual
*
dx_actual
+
x
*
x
)
loss
.
backward
()
x_grad_actual
=
x
.
gradient
()
x_grad_actual
=
x
.
gradient
()
x_grad_expected
=
(
2.0
*
x_np
/
float
(
numel
)).
astype
(
'float32'
)
x_grad_expected
=
(
2.0
*
x_np
/
float
(
numel
)).
astype
(
'float32'
)
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
self
.
assertTrue
(
np
.
allclose
(
x_grad_actual
,
x_grad_expected
))
def
test_example_with_gradient_accumulation_and_not_create_graph
(
self
):
with
_test_eager_guard
():
self
.
func_example_with_gradient_accumulation_and_not_create_graph
()
self
.
func_example_with_gradient_accumulation_and_not_create_graph
()
class
TestDygraphDoubleGradSortGradient
(
TestDygraphDoubleGrad
):
class
TestDygraphDoubleGradSortGradient
(
TestDygraphDoubleGrad
):
...
...
python/paddle/static/input.py
浏览文件 @
4be77e53
...
@@ -193,7 +193,7 @@ class InputSpec(object):
...
@@ -193,7 +193,7 @@ class InputSpec(object):
print(x_spec) # InputSpec(shape=(2, 2), dtype=VarType.FP32, name=x)
print(x_spec) # InputSpec(shape=(2, 2), dtype=VarType.FP32, name=x)
"""
"""
if
isinstance
(
tensor
,
(
Variable
,
core
.
VarBase
)):
if
isinstance
(
tensor
,
(
Variable
,
core
.
VarBase
,
core
.
eager
.
Tensor
)):
return
cls
(
tensor
.
shape
,
tensor
.
dtype
,
name
or
tensor
.
name
)
return
cls
(
tensor
.
shape
,
tensor
.
dtype
,
name
or
tensor
.
name
)
else
:
else
:
raise
ValueError
(
raise
ValueError
(
...
...
python/paddle/utils/code_gen/api_base.py
浏览文件 @
4be77e53
...
@@ -698,7 +698,7 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self
...
@@ -698,7 +698,7 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self
self
.
outputs
[
'types'
],
'SetKernelOutput'
,
code_indent
,
inplace_flag
)
self
.
outputs
[
'types'
],
'SetKernelOutput'
,
code_indent
,
inplace_flag
)
api_func_name
=
self
.
get_api_func_name
()
+
(
'_'
if
inplace_flag
else
''
)
api_func_name
=
self
.
get_api_func_name
()
+
(
'_'
if
inplace_flag
else
''
)
return
f
"""
return
f
"""
{
code_indent
}
auto
kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
{
code_indent
}
const auto&
kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
{
code_indent
}
"
{
self
.
kernel
[
'func'
][
0
]
}
", {{kernel_backend, kernel_layout, kernel_data_type}});
{
code_indent
}
"
{
self
.
kernel
[
'func'
][
0
]
}
", {{kernel_backend, kernel_layout, kernel_data_type}});
{
code_indent
}
VLOG(6) << "
{
self
.
api
}
API kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]";
{
code_indent
}
VLOG(6) << "
{
self
.
api
}
API kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]";
{
code_indent
}
VLOG(6) << "
{
self
.
api
}
API kernel: " << kernel;
{
code_indent
}
VLOG(6) << "
{
self
.
api
}
API kernel: " << kernel;
...
...
tools/infrt/custom_pdop.td
浏览文件 @
4be77e53
...
@@ -23,16 +23,6 @@ def PD_FetchOp : PD_Op<"fetch", [Terminator]> {
...
@@ -23,16 +23,6 @@ def PD_FetchOp : PD_Op<"fetch", [Terminator]> {
let arguments = (ins PD_Tensor :$inputs, StrAttr:$name);
let arguments = (ins PD_Tensor :$inputs, StrAttr:$name);
}
}
def PD_ReturnOp : PD_Op<"return", [Terminator]> {
let summary = "return Op";
let description = [{
Fetch tensor from the graph.
}];
let arguments = (ins Variadic<PD_Tensor>:$inputs);
}
def PD_GraphOp : PD_Op<"graph", [SingleBlockImplicitTerminator<"::infrt::ReturnOp">]> {
def PD_GraphOp : PD_Op<"graph", [SingleBlockImplicitTerminator<"::infrt::ReturnOp">]> {
let summary = "paddle graph Op";
let summary = "paddle graph Op";
let description = [{
let description = [{
...
...
tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py
浏览文件 @
4be77e53
...
@@ -16,8 +16,6 @@ import paddle.fluid.framework as framework
...
@@ -16,8 +16,6 @@ import paddle.fluid.framework as framework
from
paddle.fluid
import
core
from
paddle.fluid
import
core
from
paddle
import
compat
as
cpt
from
paddle
import
compat
as
cpt
ops_having_canonicalization
=
{
"elementwise_add"
,
}
# collect original ops: op which has both inference and grid defination
# collect original ops: op which has both inference and grid defination
def
get_original_ops
():
def
get_original_ops
():
...
@@ -186,7 +184,7 @@ def generate_all_ops_inputs_outputs_map(op_descs):
...
@@ -186,7 +184,7 @@ def generate_all_ops_inputs_outputs_map(op_descs):
cpp_style_ops_outputs_map_str
=
start_
+
ops_outputs_str
+
"
\n
};"
cpp_style_ops_outputs_map_str
=
start_
+
ops_outputs_str
+
"
\n
};"
# 3. Write to header file
# 3. Write to header file
dst_head_file
=
"../../paddle/infrt/dialect/pd_ops_info.h"
dst_head_file
=
"../../paddle/infrt/dialect/pd
/common/pd
_ops_info.h"
with
open
(
dst_head_file
,
'w'
)
as
ops_inputs_outputs_head_file
:
with
open
(
dst_head_file
,
'w'
)
as
ops_inputs_outputs_head_file
:
ops_inputs_outputs_head_file
.
write
(
cpp_style_ops_inputs_map_str
)
ops_inputs_outputs_head_file
.
write
(
cpp_style_ops_inputs_map_str
)
ops_inputs_outputs_head_file
.
write
(
"
\n\n
"
)
ops_inputs_outputs_head_file
.
write
(
"
\n\n
"
)
...
@@ -195,7 +193,7 @@ def generate_all_ops_inputs_outputs_map(op_descs):
...
@@ -195,7 +193,7 @@ def generate_all_ops_inputs_outputs_map(op_descs):
# funtion to generate paddle op dialect file
# funtion to generate paddle op dialect file
def
convert_op_proto_into_mlir
(
op_descs
):
def
convert_op_proto_into_mlir
(
op_descs
):
dst_dialect_file
=
"../../paddle/infrt/dialect/pd_ops.td"
dst_dialect_file
=
"../../paddle/infrt/dialect/pd
/ir/pd
_ops.td"
custom_dialect_file
=
"custom_pdop.td"
custom_dialect_file
=
"custom_pdop.td"
# 1. Head files
# 1. Head files
...
@@ -214,7 +212,7 @@ def convert_op_proto_into_mlir(op_descs):
...
@@ -214,7 +212,7 @@ def convert_op_proto_into_mlir(op_descs):
"include
\"
mlir/Interfaces/InferTypeOpInterface.td
\"
"
,
"include
\"
mlir/Interfaces/InferTypeOpInterface.td
\"
"
,
"include
\"
mlir/Interfaces/LoopLikeInterface.td
\"
"
,
"include
\"
mlir/Interfaces/LoopLikeInterface.td
\"
"
,
"include
\"
mlir/IR/OpBase.td
\"
"
,
"include
\"
mlir/IR/OpBase.td
\"
"
,
"include
\"
paddle/infrt/dialect/pd_op_base.td
\"
"
,
"include
\"
paddle/infrt/dialect/pd
/ir/pd
_op_base.td
\"
"
,
""
,
""
,
]
]
...
@@ -245,7 +243,6 @@ def convert_op_proto_into_mlir(op_descs):
...
@@ -245,7 +243,6 @@ def convert_op_proto_into_mlir(op_descs):
op_type
=
op_type
,
op_type
=
op_type
,
left_brace
=
"{"
)
left_brace
=
"{"
)
SUMMARY
=
' let summary = "{} op";
\n
'
.
format
(
op_type
)
SUMMARY
=
' let summary = "{} op";
\n
'
.
format
(
op_type
)
CANONICALIZATION
=
"let hasCanonicalizer = 1;"
if
op_type
in
ops_having_canonicalization
else
""
# 2.2 Description
# 2.2 Description
contents
=
""
contents
=
""
...
@@ -348,7 +345,6 @@ def convert_op_proto_into_mlir(op_descs):
...
@@ -348,7 +345,6 @@ def convert_op_proto_into_mlir(op_descs):
ops_mlir_file
.
write
(
DESCRIPTION
)
ops_mlir_file
.
write
(
DESCRIPTION
)
ops_mlir_file
.
write
(
ARGUMENTS
)
ops_mlir_file
.
write
(
ARGUMENTS
)
ops_mlir_file
.
write
(
RESULTS
)
ops_mlir_file
.
write
(
RESULTS
)
ops_mlir_file
.
write
(
CANONICALIZATION
)
ops_mlir_file
.
write
(
"}
\n
"
)
ops_mlir_file
.
write
(
"}
\n
"
)
print
(
"Skipped ops num: "
+
str
(
len
(
skipped_op_list
)))
print
(
"Skipped ops num: "
+
str
(
len
(
skipped_op_list
)))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录