Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
f5d9981e
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f5d9981e
编写于
8月 29, 2023
作者:
Z
zhangbo9674
提交者:
GitHub
8月 29, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Revert "[NewIR]Fix new ir output dtype bug (#56620)" (#56739)
This reverts commit
1409e4ec
.
上级
138bdf40
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
122 addition
and
86 deletion
+122
-86
paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
+120
-67
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
+2
-0
test/ir/new_ir/test_standalone_new_ir.py
test/ir/new_ir/test_standalone_new_ir.py
+0
-19
未找到文件。
paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
浏览文件 @
f5d9981e
...
...
@@ -209,46 +209,6 @@ ir::OpResult AddPlaceTransferOp(ir::OpResult in,
}
}
ir
::
Type
BuildOutputType
(
ir
::
Type
type
,
phi
::
Place
place
,
phi
::
DataType
data_type
,
ir
::
IrContext
*
ctx
)
{
if
(
type
.
isa
<
dialect
::
DenseTensorType
>
())
{
auto
dense_tensor_type
=
type
.
dyn_cast
<
dialect
::
DenseTensorType
>
();
auto
out_dtype
=
dense_tensor_type
.
dtype
();
if
(
data_type
!=
phi
::
DataType
::
UNDEFINED
)
{
out_dtype
=
TransToIrDataType
(
data_type
,
ctx
);
}
return
dialect
::
AllocatedDenseTensorType
::
get
(
ctx
,
place
,
out_dtype
,
dense_tensor_type
.
dims
(),
dense_tensor_type
.
data_layout
(),
dense_tensor_type
.
lod
(),
dense_tensor_type
.
offset
());
}
else
if
(
type
.
isa
<
dialect
::
SelectedRowsType
>
())
{
auto
selected_rows_type
=
type
.
dyn_cast
<
dialect
::
SelectedRowsType
>
();
auto
out_dtype
=
selected_rows_type
.
dtype
();
if
(
data_type
!=
phi
::
DataType
::
UNDEFINED
)
{
out_dtype
=
TransToIrDataType
(
data_type
,
ctx
);
}
return
dialect
::
AllocatedSelectedRowsType
::
get
(
ctx
,
place
,
out_dtype
,
selected_rows_type
.
dims
(),
selected_rows_type
.
data_layout
(),
selected_rows_type
.
lod
(),
selected_rows_type
.
offset
());
}
else
{
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"BuildOutputType only support DenseTensorType and SelectedRowsType"
));
}
}
phi
::
DataType
GetKernelDataTypeByYamlInfo
(
const
ir
::
Operation
*
op
,
const
std
::
unordered_map
<
ir
::
Value
,
ir
::
OpResult
>&
map_value_pair
,
...
...
@@ -560,7 +520,6 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
std
::
vector
<
phi
::
Place
>
out_places
;
// Copy op inputs
std
::
vector
<
ir
::
OpResult
>
vec_inputs
;
std
::
vector
<
ir
::
Type
>
vec_inner_types
;
if
(
op_item
->
num_operands
()
>
0
)
{
for
(
size_t
i
=
0
;
i
<
op_item
->
num_operands
();
++
i
)
{
auto
cur_in
=
op_item
->
operand_source
(
i
);
...
...
@@ -576,7 +535,6 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
op_item
->
name
()));
auto
new_in
=
map_value_pair
.
at
(
cur_in
);
vec_inputs
.
push_back
(
new_in
);
vec_inner_types
.
push_back
(
new_in
.
type
());
if
(
new_in
.
type
().
isa
<
paddle
::
dialect
::
AllocatedDenseTensorType
>
())
{
out_places
.
push_back
(
new_in
.
type
()
...
...
@@ -590,9 +548,49 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
}
// Copy op output type
std
::
vector
<
ir
::
Type
>
op_output_types
;
ir
::
Type
t1
=
ir
::
VectorType
::
get
(
ctx
,
vec_inner_types
);
op_output_types
.
push_back
(
t1
);
if
(
op_item
->
num_results
()
>
0
)
{
for
(
size_t
i
=
0
;
i
<
op_item
->
num_results
();
++
i
)
{
auto
result_type
=
op_item
->
result
(
i
).
type
();
if
(
!
result_type
)
{
op_output_types
.
push_back
(
result_type
);
}
else
if
(
result_type
.
isa
<
ir
::
VectorType
>
())
{
std
::
vector
<
ir
::
Type
>
vec_inner_types
;
auto
base_types
=
result_type
.
dyn_cast
<
ir
::
VectorType
>
().
data
();
for
(
size_t
idx
=
0
;
idx
<
base_types
.
size
();
idx
++
)
{
auto
&
base_type
=
base_types
[
idx
];
if
(
base_type
)
{
if
(
base_type
.
isa
<
dialect
::
DenseTensorType
>
())
{
auto
allocated_dense_tensor_dtype
=
paddle
::
dialect
::
AllocatedDenseTensorType
::
get
(
ctx
,
out_places
[
idx
],
base_type
.
dyn_cast
<
dialect
::
DenseTensorType
>
());
vec_inner_types
.
push_back
(
allocated_dense_tensor_dtype
);
}
else
{
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"only support dense tensor in vector type for now"
));
}
}
else
{
// NOTE(phlrain), kernel not support a nullptr in output
ir
::
Type
fp32_dtype
=
ir
::
Float32Type
::
get
(
ctx
);
phi
::
DDim
dims
=
{};
phi
::
DataLayout
data_layout
=
phi
::
DataLayout
::
NCHW
;
phi
::
LoD
lod
=
{{}};
size_t
offset
=
0
;
auto
dense_tensor_dtype
=
paddle
::
dialect
::
DenseTensorType
::
get
(
ctx
,
fp32_dtype
,
dims
,
data_layout
,
lod
,
offset
);
vec_inner_types
.
push_back
(
dense_tensor_dtype
);
}
}
ir
::
Type
t1
=
ir
::
VectorType
::
get
(
ctx
,
vec_inner_types
);
op_output_types
.
push_back
(
t1
);
}
else
{
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"builtin.combine Result type only support "
"VectorType<DenseTensorType>"
));
}
}
}
// Get op info
ir
::
OpInfo
op_info
=
ctx
->
GetRegisteredOpInfo
(
op_item
->
name
());
// Generate new op
...
...
@@ -611,8 +609,9 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
}
if
(
op_item
->
name
()
==
"builtin.slice"
)
{
phi
::
Place
out_place
=
place
;
// Copy op inputs
std
::
vector
<
ir
::
OpResult
>
vec_inputs
;
std
::
vector
<
ir
::
Type
>
op_output_types
;
if
(
op_item
->
num_operands
()
>
0
)
{
for
(
size_t
i
=
0
;
i
<
op_item
->
num_operands
();
++
i
)
{
auto
cur_in
=
op_item
->
operand_source
(
i
);
...
...
@@ -631,18 +630,39 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
if
(
new_in
.
type
().
isa
<
ir
::
VectorType
>
())
{
auto
vec_types
=
new_in
.
type
().
dyn_cast
<
ir
::
VectorType
>
().
data
();
auto
index
=
op_item
->
attributes
()
.
at
(
"index"
)
.
dyn_cast
<
ir
::
Int32Attribute
>
()
.
data
();
op_output_types
.
push_back
(
vec_types
[
index
]);
out_place
=
vec_types
[
op_item
->
attributes
()
.
at
(
"index"
)
.
dyn_cast
<
ir
::
Int32Attribute
>
()
.
data
()]
.
dyn_cast
<
paddle
::
dialect
::
AllocatedDenseTensorType
>
()
.
place
();
}
else
{
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"only support vector type for now"
));
}
}
}
// Copy op output type
std
::
vector
<
ir
::
Type
>
op_output_types
;
if
(
op_item
->
num_results
()
>
0
)
{
for
(
size_t
i
=
0
;
i
<
op_item
->
num_results
();
++
i
)
{
auto
result_type
=
op_item
->
result
(
i
).
type
();
if
(
!
result_type
)
{
op_output_types
.
push_back
(
result_type
);
}
else
if
(
result_type
.
isa
<
dialect
::
DenseTensorType
>
())
{
auto
allocated_dense_tensor_dtype
=
paddle
::
dialect
::
AllocatedDenseTensorType
::
get
(
ctx
,
out_place
,
result_type
.
dyn_cast
<
dialect
::
DenseTensorType
>
());
op_output_types
.
push_back
(
allocated_dense_tensor_dtype
);
}
else
{
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"builtin.slice Result type only support DenseTensorType"
));
}
}
}
// Get op info
ir
::
OpInfo
op_info
=
ctx
->
GetRegisteredOpInfo
(
op_item
->
name
());
// Generate new op
...
...
@@ -664,7 +684,6 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
std
::
vector
<
phi
::
Place
>
out_places
(
op_item
->
num_results
());
// Copy op inputs
std
::
vector
<
ir
::
OpResult
>
vec_inputs
;
std
::
vector
<
ir
::
Type
>
op_output_types
;
if
(
op_item
->
num_operands
()
>
0
)
{
for
(
size_t
i
=
0
;
i
<
op_item
->
num_operands
();
++
i
)
{
auto
cur_in
=
op_item
->
operand_source
(
i
);
...
...
@@ -684,7 +703,10 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
if
(
new_in
.
type
().
isa
<
ir
::
VectorType
>
())
{
auto
vec_types
=
new_in
.
type
().
dyn_cast
<
ir
::
VectorType
>
().
data
();
for
(
uint64_t
idx
=
0
;
idx
<
vec_types
.
size
();
idx
++
)
{
op_output_types
.
push_back
(
vec_types
[
idx
]);
out_places
[
idx
]
=
vec_types
[
idx
]
.
dyn_cast
<
paddle
::
dialect
::
AllocatedDenseTensorType
>
()
.
place
();
}
}
else
{
PADDLE_THROW
(
...
...
@@ -692,7 +714,26 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
}
}
}
// Copy op output type
std
::
vector
<
ir
::
Type
>
op_output_types
;
if
(
op_item
->
num_results
()
>
0
)
{
for
(
size_t
i
=
0
;
i
<
op_item
->
num_results
();
++
i
)
{
auto
result_type
=
op_item
->
result
(
i
).
type
();
if
(
!
result_type
)
{
op_output_types
.
push_back
(
result_type
);
}
else
if
(
result_type
.
isa
<
dialect
::
DenseTensorType
>
())
{
auto
allocated_dense_tensor_dtype
=
paddle
::
dialect
::
AllocatedDenseTensorType
::
get
(
ctx
,
out_places
[
i
],
result_type
.
dyn_cast
<
dialect
::
DenseTensorType
>
());
op_output_types
.
push_back
(
allocated_dense_tensor_dtype
);
}
else
{
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"builtin.split Result type only support DenseTensorType"
));
}
}
}
// Get op info
ir
::
OpInfo
op_info
=
ctx
->
GetRegisteredOpInfo
(
op_item
->
name
());
// Generate new op
...
...
@@ -759,30 +800,36 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
}
for
(
size_t
i
=
0
;
i
<
op_item
->
num_results
();
++
i
)
{
phi
::
Place
out_place
=
phi
::
TransToPhiPlace
(
kernel_key
.
backend
());
phi
::
DataType
out_phi_dtype
=
phi
::
DataType
::
UNDEFINED
;
phi
::
Place
out_place
;
if
((
!
UnchangeOutputOps
.
count
(
op_item
->
name
()))
&&
(
!
IsLegacyOp
(
op_item
->
name
()))
&&
phi_kernel
.
IsValid
())
{
out_place
=
phi
::
TransToPhiPlace
(
output_defs
[
i
].
backend
);
out_phi_dtype
=
output_defs
[
i
].
dtype
;
}
else
{
out_place
=
phi
::
TransToPhiPlace
(
kernel_key
.
backend
());
}
auto
result_type
=
op_item
->
result
(
i
).
type
();
if
(
!
result_type
)
{
op_output_types
.
push_back
(
result_type
);
}
else
if
(
result_type
.
isa
<
dialect
::
DenseTensorType
>
()
||
result_type
.
isa
<
dialect
::
SelectedRowsType
>
())
{
op_output_types
.
push_back
(
BuildOutputType
(
result_type
,
out_place
,
out_phi_dtype
,
ctx
));
}
else
if
(
result_type
.
isa
<
dialect
::
DenseTensorType
>
())
{
auto
allocated_dense_tensor_dtype
=
paddle
::
dialect
::
AllocatedDenseTensorType
::
get
(
ctx
,
out_place
,
result_type
.
dyn_cast
<
dialect
::
DenseTensorType
>
());
op_output_types
.
push_back
(
allocated_dense_tensor_dtype
);
}
else
if
(
result_type
.
isa
<
ir
::
VectorType
>
())
{
std
::
vector
<
ir
::
Type
>
vec_inner_types
;
auto
base_types
=
result_type
.
dyn_cast
<
ir
::
VectorType
>
().
data
();
for
(
auto
&
base_type
:
base_types
)
{
if
(
base_type
)
{
if
(
base_type
.
isa
<
dialect
::
DenseTensorType
>
())
{
vec_inner_types
.
push_back
(
BuildOutputType
(
base_type
,
out_place
,
out_phi_dtype
,
ctx
));
auto
allocated_dense_tensor_dtype
=
paddle
::
dialect
::
AllocatedDenseTensorType
::
get
(
ctx
,
out_place
,
base_type
.
dyn_cast
<
dialect
::
DenseTensorType
>
());
vec_inner_types
.
push_back
(
allocated_dense_tensor_dtype
);
}
else
{
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"only support dense tensor in vector type for now"
));
...
...
@@ -805,10 +852,16 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
ir
::
Type
t1
=
ir
::
VectorType
::
get
(
ctx
,
vec_inner_types
);
op_output_types
.
push_back
(
t1
);
}
else
if
(
result_type
.
isa
<
dialect
::
SelectedRowsType
>
())
{
auto
allocated_selected_rows_dtype
=
paddle
::
dialect
::
AllocatedSelectedRowsType
::
get
(
ctx
,
out_place
,
result_type
.
dyn_cast
<
dialect
::
SelectedRowsType
>
());
op_output_types
.
emplace_back
(
allocated_selected_rows_dtype
);
}
else
{
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"Result type only support DenseTensorType, SelectedRowType and "
"VectorType"
));
"Result type only support DenseTensorType and VectorType"
));
}
}
}
...
...
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
浏览文件 @
f5d9981e
...
...
@@ -1387,6 +1387,7 @@ PD_REGISTER_KERNEL(batch_norm_grad,
phi
::
dtype
::
float16
)
{
if
(
kernel_key
.
dtype
()
==
phi
::
DataType
::
FLOAT16
||
kernel_key
.
dtype
()
==
phi
::
DataType
::
BFLOAT16
)
{
kernel
->
OutputAt
(
0
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
// x_grad
kernel
->
OutputAt
(
1
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
// scale_grad
kernel
->
OutputAt
(
2
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
// bias_grad
}
...
...
@@ -1404,6 +1405,7 @@ PD_REGISTER_KERNEL(batch_norm_grad,
double
,
phi
::
dtype
::
float16
)
{
if
(
kernel_key
.
dtype
()
==
phi
::
DataType
::
FLOAT16
)
{
kernel
->
OutputAt
(
0
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
// x_grad
kernel
->
OutputAt
(
1
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
// scale_grad
kernel
->
OutputAt
(
2
).
SetDataType
(
phi
::
DataType
::
FLOAT32
);
// bias_grad
}
...
...
test/ir/new_ir/test_standalone_new_ir.py
浏览文件 @
f5d9981e
...
...
@@ -345,25 +345,6 @@ class TestNewIrConcatDygraph(unittest.TestCase):
np
.
testing
.
assert_array_equal
(
z
.
numpy
(),
gold_res
)
class
TestNewIrLogicalDygraph
(
unittest
.
TestCase
):
def
test_with_new_ir
(
self
):
paddle
.
disable_static
()
@
paddle
.
jit
.
to_static
def
func
(
x
,
y
,
z
):
a
=
paddle
.
logical_and
(
x
,
y
)
return
z
+
a
.
cast
(
"float32"
)
x
=
paddle
.
ones
([
2
,
2
],
dtype
=
'float32'
)
y
=
paddle
.
ones
([
2
,
2
],
dtype
=
'float32'
)
z
=
paddle
.
ones
([
2
,
2
],
dtype
=
'float32'
)
z
=
func
(
x
,
y
,
z
)
gold_res
=
np
.
ones
([
2
,
2
],
dtype
=
"float32"
)
*
2
np
.
testing
.
assert_array_equal
(
z
.
numpy
(),
gold_res
)
if
__name__
==
"__main__"
:
paddle
.
enable_static
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录