diff --git a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc index 408c2f092c891afdbcf05828221e06b27feb4cb9..4198098f2bd4fbb65e00f452d043d56b80ae6f55 100644 --- a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc +++ b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc @@ -209,46 +209,6 @@ ir::OpResult AddPlaceTransferOp(ir::OpResult in, } } -ir::Type BuildOutputType(ir::Type type, - phi::Place place, - phi::DataType data_type, - ir::IrContext* ctx) { - if (type.isa()) { - auto dense_tensor_type = type.dyn_cast(); - auto out_dtype = dense_tensor_type.dtype(); - if (data_type != phi::DataType::UNDEFINED) { - out_dtype = TransToIrDataType(data_type, ctx); - } - - return dialect::AllocatedDenseTensorType::get( - ctx, - place, - out_dtype, - dense_tensor_type.dims(), - dense_tensor_type.data_layout(), - dense_tensor_type.lod(), - dense_tensor_type.offset()); - - } else if (type.isa()) { - auto selected_rows_type = type.dyn_cast(); - auto out_dtype = selected_rows_type.dtype(); - if (data_type != phi::DataType::UNDEFINED) { - out_dtype = TransToIrDataType(data_type, ctx); - } - return dialect::AllocatedSelectedRowsType::get( - ctx, - place, - out_dtype, - selected_rows_type.dims(), - selected_rows_type.data_layout(), - selected_rows_type.lod(), - selected_rows_type.offset()); - } else { - PADDLE_THROW(phi::errors::Unimplemented( - "BuildOutputType only support DenseTensorType and SelectedRowsType")); - } -} - phi::DataType GetKernelDataTypeByYamlInfo( const ir::Operation* op, const std::unordered_map& map_value_pair, @@ -560,7 +520,6 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, std::vector out_places; // Copy op inputs std::vector vec_inputs; - std::vector vec_inner_types; if (op_item->num_operands() > 0) { for (size_t i = 0; i < op_item->num_operands(); ++i) { auto cur_in = op_item->operand_source(i); @@ -576,7 +535,6 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, op_item->name())); auto new_in = map_value_pair.at(cur_in); vec_inputs.push_back(new_in); - vec_inner_types.push_back(new_in.type()); if (new_in.type().isa()) { out_places.push_back( new_in.type() @@ -590,9 +548,49 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, } // Copy op output type std::vector op_output_types; - ir::Type t1 = ir::VectorType::get(ctx, vec_inner_types); - op_output_types.push_back(t1); - + if (op_item->num_results() > 0) { + for (size_t i = 0; i < op_item->num_results(); ++i) { + auto result_type = op_item->result(i).type(); + if (!result_type) { + op_output_types.push_back(result_type); + } else if (result_type.isa()) { + std::vector vec_inner_types; + auto base_types = result_type.dyn_cast().data(); + for (size_t idx = 0; idx < base_types.size(); idx++) { + auto& base_type = base_types[idx]; + if (base_type) { + if (base_type.isa()) { + auto allocated_dense_tensor_dtype = + paddle::dialect::AllocatedDenseTensorType::get( + ctx, + out_places[idx], + base_type.dyn_cast()); + vec_inner_types.push_back(allocated_dense_tensor_dtype); + } else { + PADDLE_THROW(phi::errors::Unimplemented( + "only support dense tensor in vector type for now")); + } + } else { + // NOTE(phlrain), kernel not support a nullptr in output + ir::Type fp32_dtype = ir::Float32Type::get(ctx); + phi::DDim dims = {}; + phi::DataLayout data_layout = phi::DataLayout::NCHW; + phi::LoD lod = {{}}; + size_t offset = 0; + auto dense_tensor_dtype = paddle::dialect::DenseTensorType::get( + ctx, fp32_dtype, dims, data_layout, lod, offset); + vec_inner_types.push_back(dense_tensor_dtype); + } + } + ir::Type t1 = ir::VectorType::get(ctx, vec_inner_types); + op_output_types.push_back(t1); + } else { + PADDLE_THROW(phi::errors::Unimplemented( + "builtin.combine Result type only support " + "VectorType")); + } + } + } // Get op info ir::OpInfo op_info = ctx->GetRegisteredOpInfo(op_item->name()); // Generate new op @@ -611,8 +609,9 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, } if (op_item->name() == "builtin.slice") { + phi::Place out_place = place; + // Copy op inputs std::vector vec_inputs; - std::vector op_output_types; if (op_item->num_operands() > 0) { for (size_t i = 0; i < op_item->num_operands(); ++i) { auto cur_in = op_item->operand_source(i); @@ -631,18 +630,39 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, if (new_in.type().isa()) { auto vec_types = new_in.type().dyn_cast().data(); - auto index = op_item->attributes() - .at("index") - .dyn_cast() - .data(); - op_output_types.push_back(vec_types[index]); + out_place = + vec_types[op_item->attributes() + .at("index") + .dyn_cast() + .data()] + .dyn_cast() + .place(); } else { PADDLE_THROW( phi::errors::Unimplemented("only support vector type for now")); } } } - + // Copy op output type + std::vector op_output_types; + if (op_item->num_results() > 0) { + for (size_t i = 0; i < op_item->num_results(); ++i) { + auto result_type = op_item->result(i).type(); + if (!result_type) { + op_output_types.push_back(result_type); + } else if (result_type.isa()) { + auto allocated_dense_tensor_dtype = + paddle::dialect::AllocatedDenseTensorType::get( + ctx, + out_place, + result_type.dyn_cast()); + op_output_types.push_back(allocated_dense_tensor_dtype); + } else { + PADDLE_THROW(phi::errors::Unimplemented( + "builtin.slice Result type only support DenseTensorType")); + } + } + } // Get op info ir::OpInfo op_info = ctx->GetRegisteredOpInfo(op_item->name()); // Generate new op @@ -664,7 +684,6 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, std::vector out_places(op_item->num_results()); // Copy op inputs std::vector vec_inputs; - std::vector op_output_types; if (op_item->num_operands() > 0) { for (size_t i = 0; i < op_item->num_operands(); ++i) { auto cur_in = op_item->operand_source(i); @@ -684,7 +703,10 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, if (new_in.type().isa()) { auto vec_types = new_in.type().dyn_cast().data(); for (uint64_t idx = 0; idx < vec_types.size(); idx++) { - op_output_types.push_back(vec_types[idx]); + out_places[idx] = + vec_types[idx] + .dyn_cast() + .place(); } } else { PADDLE_THROW( @@ -692,7 +714,26 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, } } } - + // Copy op output type + std::vector op_output_types; + if (op_item->num_results() > 0) { + for (size_t i = 0; i < op_item->num_results(); ++i) { + auto result_type = op_item->result(i).type(); + if (!result_type) { + op_output_types.push_back(result_type); + } else if (result_type.isa()) { + auto allocated_dense_tensor_dtype = + paddle::dialect::AllocatedDenseTensorType::get( + ctx, + out_places[i], + result_type.dyn_cast()); + op_output_types.push_back(allocated_dense_tensor_dtype); + } else { + PADDLE_THROW(phi::errors::Unimplemented( + "builtin.split Result type only support DenseTensorType")); + } + } + } // Get op info ir::OpInfo op_info = ctx->GetRegisteredOpInfo(op_item->name()); // Generate new op @@ -759,30 +800,36 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, } for (size_t i = 0; i < op_item->num_results(); ++i) { - phi::Place out_place = phi::TransToPhiPlace(kernel_key.backend()); - - phi::DataType out_phi_dtype = phi::DataType::UNDEFINED; + phi::Place out_place; if ((!UnchangeOutputOps.count(op_item->name())) && (!IsLegacyOp(op_item->name())) && phi_kernel.IsValid()) { out_place = phi::TransToPhiPlace(output_defs[i].backend); - out_phi_dtype = output_defs[i].dtype; + } else { + out_place = phi::TransToPhiPlace(kernel_key.backend()); } auto result_type = op_item->result(i).type(); if (!result_type) { op_output_types.push_back(result_type); - } else if (result_type.isa() || - result_type.isa()) { - op_output_types.push_back( - BuildOutputType(result_type, out_place, out_phi_dtype, ctx)); + } else if (result_type.isa()) { + auto allocated_dense_tensor_dtype = + paddle::dialect::AllocatedDenseTensorType::get( + ctx, + out_place, + result_type.dyn_cast()); + op_output_types.push_back(allocated_dense_tensor_dtype); } else if (result_type.isa()) { std::vector vec_inner_types; auto base_types = result_type.dyn_cast().data(); for (auto& base_type : base_types) { if (base_type) { if (base_type.isa()) { - vec_inner_types.push_back( - BuildOutputType(base_type, out_place, out_phi_dtype, ctx)); + auto allocated_dense_tensor_dtype = + paddle::dialect::AllocatedDenseTensorType::get( + ctx, + out_place, + base_type.dyn_cast()); + vec_inner_types.push_back(allocated_dense_tensor_dtype); } else { PADDLE_THROW(phi::errors::Unimplemented( "only support dense tensor in vector type for now")); @@ -805,10 +852,16 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, ir::Type t1 = ir::VectorType::get(ctx, vec_inner_types); op_output_types.push_back(t1); + } else if (result_type.isa()) { + auto allocated_selected_rows_dtype = + paddle::dialect::AllocatedSelectedRowsType::get( + ctx, + out_place, + result_type.dyn_cast()); + op_output_types.emplace_back(allocated_selected_rows_dtype); } else { PADDLE_THROW(phi::errors::Unimplemented( - "Result type only support DenseTensorType, SelectedRowType and " - "VectorType")); + "Result type only support DenseTensorType and VectorType")); } } } diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu index 07c2fc19d1c3c52f67fa15e25acb02d9bd287ef7..0609bf945d9b38343212a51f4aae9635a0e6bd7b 100644 --- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu @@ -1387,6 +1387,7 @@ PD_REGISTER_KERNEL(batch_norm_grad, phi::dtype::float16) { if (kernel_key.dtype() == phi::DataType::FLOAT16 || kernel_key.dtype() == phi::DataType::BFLOAT16) { + kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32); // x_grad kernel->OutputAt(1).SetDataType(phi::DataType::FLOAT32); // scale_grad kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32); // bias_grad } @@ -1404,6 +1405,7 @@ PD_REGISTER_KERNEL(batch_norm_grad, double, phi::dtype::float16) { if (kernel_key.dtype() == phi::DataType::FLOAT16) { + kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32); // x_grad kernel->OutputAt(1).SetDataType(phi::DataType::FLOAT32); // scale_grad kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32); // bias_grad } diff --git a/test/ir/new_ir/test_standalone_new_ir.py b/test/ir/new_ir/test_standalone_new_ir.py index 037532d427a54dea1f88a1e3feb39cdee66e0a1d..fd5ee67570769494699fc1e2313f590227e8ae61 100644 --- a/test/ir/new_ir/test_standalone_new_ir.py +++ b/test/ir/new_ir/test_standalone_new_ir.py @@ -345,25 +345,6 @@ class TestNewIrConcatDygraph(unittest.TestCase): np.testing.assert_array_equal(z.numpy(), gold_res) -class TestNewIrLogicalDygraph(unittest.TestCase): - def test_with_new_ir(self): - paddle.disable_static() - - @paddle.jit.to_static - def func(x, y, z): - a = paddle.logical_and(x, y) - return z + a.cast("float32") - - x = paddle.ones([2, 2], dtype='float32') - y = paddle.ones([2, 2], dtype='float32') - z = paddle.ones([2, 2], dtype='float32') - - z = func(x, y, z) - - gold_res = np.ones([2, 2], dtype="float32") * 2 - np.testing.assert_array_equal(z.numpy(), gold_res) - - if __name__ == "__main__": paddle.enable_static() unittest.main()