diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc index e9248028cde72f7090f7f3d0e198943e9bae1046..458a26a762f416a2aea2336f1778ddb085f6add2 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc @@ -284,6 +284,27 @@ void ConvBNFusePass::ApplyImpl(ir::Graph* graph) const { return; } + // conv_weight fp32 --> fp16 + auto* conv_weight_tensor = + scope->FindVar(conv_weight->Name())->GetMutable(); + auto tensor_type = conv_weight_tensor->dtype(); + + if (tensor_type == paddle::experimental::DataType::FLOAT16) { + framework::Tensor weight_float_tensor; + weight_float_tensor.set_type(paddle::experimental::DataType::FLOAT32); + weight_float_tensor.Resize(conv_weight_tensor->dims()); + auto* weight_float_data = + weight_float_tensor.mutable_data(platform::CPUPlace()); + auto* data = + conv_weight_tensor->mutable_data(platform::CPUPlace()); + for (int i = 0; i < conv_weight_tensor->numel(); i++) { + weight_float_data[i] = static_cast(data[i]); + } + conv_weight_tensor->clear(); + paddle::framework::TensorCopySync( + weight_float_tensor, platform::CPUPlace(), conv_weight_tensor); + } + // Get batch norm bias auto* bn_bias_tensor = scope->FindVar(bn_bias->Name())->GetMutable(); @@ -319,6 +340,43 @@ void ConvBNFusePass::ApplyImpl(ir::Graph* graph) const { epsilon, conv_type()); + if (tensor_type == paddle::experimental::DataType::FLOAT16) { + { + framework::Tensor weight_float16_tensor; + weight_float16_tensor.set_type(paddle::experimental::DataType::FLOAT16); + weight_float16_tensor.Resize(conv_weight_tensor->dims()); + auto* weight_float16_data = + weight_float16_tensor.mutable_data(platform::CPUPlace()); + auto* data = + conv_weight_tensor->mutable_data(platform::CPUPlace()); + for (int i = 0; i < conv_weight_tensor->numel(); i++) { + weight_float16_data[i] = static_cast(data[i]); + } + conv_weight_tensor->clear(); + paddle::framework::TensorCopySync( + weight_float16_tensor, platform::CPUPlace(), conv_weight_tensor); + } + + { + framework::Tensor eltwise_y_in_float16_tensor; + eltwise_y_in_float16_tensor.set_type( + paddle::experimental::DataType::FLOAT16); + eltwise_y_in_float16_tensor.Resize(eltwise_y_in_tensor->dims()); + auto* eltwise_y_in_float16_data = + eltwise_y_in_float16_tensor.mutable_data( + platform::CPUPlace()); + auto* data = + eltwise_y_in_tensor->mutable_data(platform::CPUPlace()); + for (int i = 0; i < eltwise_y_in_tensor->numel(); i++) { + eltwise_y_in_float16_data[i] = static_cast(data[i]); + } + eltwise_y_in_tensor->clear(); + paddle::framework::TensorCopySync(eltwise_y_in_float16_tensor, + platform::CPUPlace(), + eltwise_y_in_tensor); + } + } + // with MKL-DNN fuse conv+bn into conv with bias // without MKL-DNN fuse conv+bn into conv+elementwise_add if (fuse_option == FUSE_MKLDNN) { diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 3642a28790aeca28f93292182cb00d1cffc9d685..555a5df3713091bb5c02de9b0c38cf255f83ef0a 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -154,9 +154,12 @@ const std::vector kLiteSubgraphPasses({ // support fp16/bf16 precision, temporarily use low precision pass to prevent // running errors. After fusion operator supports low precision, delete this. const std::vector kGpuLowerPrecisionPasses{ - // "conv_bn_fuse_pass", - // "conv_eltwiseadd_bn_fuse_pass", -}; + "conv_bn_fuse_pass", + "conv_eltwiseadd_bn_fuse_pass", + "conv_elementwise_add_act_fuse_pass", + "conv_elementwise_add2_act_fuse_pass", + "conv_elementwise_add_fuse_pass"}; + const std::vector kTrtLowerPrecisionPasses{ // "conv_bn_fuse_pass", // "conv_eltwiseadd_bn_fuse_pass",