diff --git a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc index 0a37d3968c39d2c244bbd82161afddf6330e421d..c8fc0bedfd3dc6a548187adc6393b86fccfeb5fb 100644 --- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc @@ -18,6 +18,21 @@ namespace paddle { namespace inference { namespace tensorrt { +bool if_skip_merging_optimize(TensorRTEngine* engine_, + const std::vector& filters, + const std::vector& strides, + const std::vector& paddings, + std::string input_name) { + if (engine_->itensor_quote_num[input_name] > 0) { + return true; + } + if (filters[0] == 1 && filters[1] == 1 && strides[0] == 1 && + strides[1] == 1 && paddings[0] == 0 && paddings[1] == 0) + engine_->itensor_quote_num[input_name] += 1; + + return false; +} + class Conv2dOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, @@ -31,6 +46,7 @@ class Conv2dOpConverter : public OpConverter { PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1); auto* X = engine_->GetITensor(op_desc.Input("Input").front()); + // Declare weights auto* Y_v = scope.FindVar(op_desc.Input("Filter").front()); PADDLE_ENFORCE_NOT_NULL(Y_v); @@ -83,7 +99,10 @@ class Conv2dOpConverter : public OpConverter { std::move(weight_tensor); layer->getOutput(0)->setName(output_name.c_str()); engine_->SetITensor(output_name, layer->getOutput(0)); - if (test_mode) { + + if (test_mode || + if_skip_merging_optimize(engine_, {filter_h, filter_w}, strides, + paddings, op_desc.Input("Input").front())) { engine_->DeclareOutput(output_name); } } diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index bd3ba4cea6551a7f6651e311e2649de191a6faa1..e828d2077d7ec9dbeae9e75202f0aa1c26b35b60 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -132,6 +132,16 @@ class TensorRTEngine : public EngineBase { std::unordered_map> weight_map; + // TODO: (NHZLX) + // In the normal case, the paddle-trt exists bug when runing the googlenet. + // When there are more than two convolutions of 1 * 1 with the same input, the + // paddle-tensorrt will do the merging optimization, which fuse those conv + // into + // one conv, and then trigger bug. So, We should use strategy to avoid this + // optimization for the time being. This bug will be fixed in the future. + std::unordered_map + itensor_quote_num; + private: // the max batch size int max_batch_;