diff --git a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc index 4198098f2bd4fbb65e00f452d043d56b80ae6f55..408c2f092c891afdbcf05828221e06b27feb4cb9 100644 --- a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc +++ b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc @@ -209,6 +209,46 @@ ir::OpResult AddPlaceTransferOp(ir::OpResult in, } } +ir::Type BuildOutputType(ir::Type type, + phi::Place place, + phi::DataType data_type, + ir::IrContext* ctx) { + if (type.isa()) { + auto dense_tensor_type = type.dyn_cast(); + auto out_dtype = dense_tensor_type.dtype(); + if (data_type != phi::DataType::UNDEFINED) { + out_dtype = TransToIrDataType(data_type, ctx); + } + + return dialect::AllocatedDenseTensorType::get( + ctx, + place, + out_dtype, + dense_tensor_type.dims(), + dense_tensor_type.data_layout(), + dense_tensor_type.lod(), + dense_tensor_type.offset()); + + } else if (type.isa()) { + auto selected_rows_type = type.dyn_cast(); + auto out_dtype = selected_rows_type.dtype(); + if (data_type != phi::DataType::UNDEFINED) { + out_dtype = TransToIrDataType(data_type, ctx); + } + return dialect::AllocatedSelectedRowsType::get( + ctx, + place, + out_dtype, + selected_rows_type.dims(), + selected_rows_type.data_layout(), + selected_rows_type.lod(), + selected_rows_type.offset()); + } else { + PADDLE_THROW(phi::errors::Unimplemented( + "BuildOutputType only support DenseTensorType and SelectedRowsType")); + } +} + phi::DataType GetKernelDataTypeByYamlInfo( const ir::Operation* op, const std::unordered_map& map_value_pair, @@ -520,6 +560,7 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, std::vector out_places; // Copy op inputs std::vector vec_inputs; + std::vector vec_inner_types; if (op_item->num_operands() > 0) { for (size_t i = 0; i < op_item->num_operands(); ++i) { auto cur_in = op_item->operand_source(i); @@ -535,6 +576,7 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, op_item->name())); auto new_in = map_value_pair.at(cur_in); vec_inputs.push_back(new_in); + vec_inner_types.push_back(new_in.type()); if (new_in.type().isa()) { out_places.push_back( new_in.type() @@ -548,49 +590,9 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, } // Copy op output type std::vector op_output_types; - if (op_item->num_results() > 0) { - for (size_t i = 0; i < op_item->num_results(); ++i) { - auto result_type = op_item->result(i).type(); - if (!result_type) { - op_output_types.push_back(result_type); - } else if (result_type.isa()) { - std::vector vec_inner_types; - auto base_types = result_type.dyn_cast().data(); - for (size_t idx = 0; idx < base_types.size(); idx++) { - auto& base_type = base_types[idx]; - if (base_type) { - if (base_type.isa()) { - auto allocated_dense_tensor_dtype = - paddle::dialect::AllocatedDenseTensorType::get( - ctx, - out_places[idx], - base_type.dyn_cast()); - vec_inner_types.push_back(allocated_dense_tensor_dtype); - } else { - PADDLE_THROW(phi::errors::Unimplemented( - "only support dense tensor in vector type for now")); - } - } else { - // NOTE(phlrain), kernel not support a nullptr in output - ir::Type fp32_dtype = ir::Float32Type::get(ctx); - phi::DDim dims = {}; - phi::DataLayout data_layout = phi::DataLayout::NCHW; - phi::LoD lod = {{}}; - size_t offset = 0; - auto dense_tensor_dtype = paddle::dialect::DenseTensorType::get( - ctx, fp32_dtype, dims, data_layout, lod, offset); - vec_inner_types.push_back(dense_tensor_dtype); - } - } - ir::Type t1 = ir::VectorType::get(ctx, vec_inner_types); - op_output_types.push_back(t1); - } else { - PADDLE_THROW(phi::errors::Unimplemented( - "builtin.combine Result type only support " - "VectorType")); - } - } - } + ir::Type t1 = ir::VectorType::get(ctx, vec_inner_types); + op_output_types.push_back(t1); + // Get op info ir::OpInfo op_info = ctx->GetRegisteredOpInfo(op_item->name()); // Generate new op @@ -609,9 +611,8 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, } if (op_item->name() == "builtin.slice") { - phi::Place out_place = place; - // Copy op inputs std::vector vec_inputs; + std::vector op_output_types; if (op_item->num_operands() > 0) { for (size_t i = 0; i < op_item->num_operands(); ++i) { auto cur_in = op_item->operand_source(i); @@ -630,39 +631,18 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, if (new_in.type().isa()) { auto vec_types = new_in.type().dyn_cast().data(); - out_place = - vec_types[op_item->attributes() - .at("index") - .dyn_cast() - .data()] - .dyn_cast() - .place(); + auto index = op_item->attributes() + .at("index") + .dyn_cast() + .data(); + op_output_types.push_back(vec_types[index]); } else { PADDLE_THROW( phi::errors::Unimplemented("only support vector type for now")); } } } - // Copy op output type - std::vector op_output_types; - if (op_item->num_results() > 0) { - for (size_t i = 0; i < op_item->num_results(); ++i) { - auto result_type = op_item->result(i).type(); - if (!result_type) { - op_output_types.push_back(result_type); - } else if (result_type.isa()) { - auto allocated_dense_tensor_dtype = - paddle::dialect::AllocatedDenseTensorType::get( - ctx, - out_place, - result_type.dyn_cast()); - op_output_types.push_back(allocated_dense_tensor_dtype); - } else { - PADDLE_THROW(phi::errors::Unimplemented( - "builtin.slice Result type only support DenseTensorType")); - } - } - } + // Get op info ir::OpInfo op_info = ctx->GetRegisteredOpInfo(op_item->name()); // Generate new op @@ -684,6 +664,7 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, std::vector out_places(op_item->num_results()); // Copy op inputs std::vector vec_inputs; + std::vector op_output_types; if (op_item->num_operands() > 0) { for (size_t i = 0; i < op_item->num_operands(); ++i) { auto cur_in = op_item->operand_source(i); @@ -703,10 +684,7 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, if (new_in.type().isa()) { auto vec_types = new_in.type().dyn_cast().data(); for (uint64_t idx = 0; idx < vec_types.size(); idx++) { - out_places[idx] = - vec_types[idx] - .dyn_cast() - .place(); + op_output_types.push_back(vec_types[idx]); } } else { PADDLE_THROW( @@ -714,26 +692,7 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, } } } - // Copy op output type - std::vector op_output_types; - if (op_item->num_results() > 0) { - for (size_t i = 0; i < op_item->num_results(); ++i) { - auto result_type = op_item->result(i).type(); - if (!result_type) { - op_output_types.push_back(result_type); - } else if (result_type.isa()) { - auto allocated_dense_tensor_dtype = - paddle::dialect::AllocatedDenseTensorType::get( - ctx, - out_places[i], - result_type.dyn_cast()); - op_output_types.push_back(allocated_dense_tensor_dtype); - } else { - PADDLE_THROW(phi::errors::Unimplemented( - "builtin.split Result type only support DenseTensorType")); - } - } - } + // Get op info ir::OpInfo op_info = ctx->GetRegisteredOpInfo(op_item->name()); // Generate new op @@ -800,36 +759,30 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, } for (size_t i = 0; i < op_item->num_results(); ++i) { - phi::Place out_place; + phi::Place out_place = phi::TransToPhiPlace(kernel_key.backend()); + + phi::DataType out_phi_dtype = phi::DataType::UNDEFINED; if ((!UnchangeOutputOps.count(op_item->name())) && (!IsLegacyOp(op_item->name())) && phi_kernel.IsValid()) { out_place = phi::TransToPhiPlace(output_defs[i].backend); - } else { - out_place = phi::TransToPhiPlace(kernel_key.backend()); + out_phi_dtype = output_defs[i].dtype; } auto result_type = op_item->result(i).type(); if (!result_type) { op_output_types.push_back(result_type); - } else if (result_type.isa()) { - auto allocated_dense_tensor_dtype = - paddle::dialect::AllocatedDenseTensorType::get( - ctx, - out_place, - result_type.dyn_cast()); - op_output_types.push_back(allocated_dense_tensor_dtype); + } else if (result_type.isa() || + result_type.isa()) { + op_output_types.push_back( + BuildOutputType(result_type, out_place, out_phi_dtype, ctx)); } else if (result_type.isa()) { std::vector vec_inner_types; auto base_types = result_type.dyn_cast().data(); for (auto& base_type : base_types) { if (base_type) { if (base_type.isa()) { - auto allocated_dense_tensor_dtype = - paddle::dialect::AllocatedDenseTensorType::get( - ctx, - out_place, - base_type.dyn_cast()); - vec_inner_types.push_back(allocated_dense_tensor_dtype); + vec_inner_types.push_back( + BuildOutputType(base_type, out_place, out_phi_dtype, ctx)); } else { PADDLE_THROW(phi::errors::Unimplemented( "only support dense tensor in vector type for now")); @@ -852,16 +805,10 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, ir::Type t1 = ir::VectorType::get(ctx, vec_inner_types); op_output_types.push_back(t1); - } else if (result_type.isa()) { - auto allocated_selected_rows_dtype = - paddle::dialect::AllocatedSelectedRowsType::get( - ctx, - out_place, - result_type.dyn_cast()); - op_output_types.emplace_back(allocated_selected_rows_dtype); } else { PADDLE_THROW(phi::errors::Unimplemented( - "Result type only support DenseTensorType and VectorType")); + "Result type only support DenseTensorType, SelectedRowType and " + "VectorType")); } } } diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu index 0609bf945d9b38343212a51f4aae9635a0e6bd7b..07c2fc19d1c3c52f67fa15e25acb02d9bd287ef7 100644 --- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu @@ -1387,7 +1387,6 @@ PD_REGISTER_KERNEL(batch_norm_grad, phi::dtype::float16) { if (kernel_key.dtype() == phi::DataType::FLOAT16 || kernel_key.dtype() == phi::DataType::BFLOAT16) { - kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32); // x_grad kernel->OutputAt(1).SetDataType(phi::DataType::FLOAT32); // scale_grad kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32); // bias_grad } @@ -1405,7 +1404,6 @@ PD_REGISTER_KERNEL(batch_norm_grad, double, phi::dtype::float16) { if (kernel_key.dtype() == phi::DataType::FLOAT16) { - kernel->OutputAt(0).SetDataType(phi::DataType::FLOAT32); // x_grad kernel->OutputAt(1).SetDataType(phi::DataType::FLOAT32); // scale_grad kernel->OutputAt(2).SetDataType(phi::DataType::FLOAT32); // bias_grad } diff --git a/test/ir/new_ir/test_standalone_new_ir.py b/test/ir/new_ir/test_standalone_new_ir.py index fd5ee67570769494699fc1e2313f590227e8ae61..037532d427a54dea1f88a1e3feb39cdee66e0a1d 100644 --- a/test/ir/new_ir/test_standalone_new_ir.py +++ b/test/ir/new_ir/test_standalone_new_ir.py @@ -345,6 +345,25 @@ class TestNewIrConcatDygraph(unittest.TestCase): np.testing.assert_array_equal(z.numpy(), gold_res) +class TestNewIrLogicalDygraph(unittest.TestCase): + def test_with_new_ir(self): + paddle.disable_static() + + @paddle.jit.to_static + def func(x, y, z): + a = paddle.logical_and(x, y) + return z + a.cast("float32") + + x = paddle.ones([2, 2], dtype='float32') + y = paddle.ones([2, 2], dtype='float32') + z = paddle.ones([2, 2], dtype='float32') + + z = func(x, y, z) + + gold_res = np.ones([2, 2], dtype="float32") * 2 + np.testing.assert_array_equal(z.numpy(), gold_res) + + if __name__ == "__main__": paddle.enable_static() unittest.main()