未验证 提交 65b61db1 编写于 作者: Z Zhaolong Xing 提交者: GitHub

Merge pull request #13927 from NHZlX/fix_googlenet_bug_with_rule

Fix googlenet bug with rule
......@@ -18,6 +18,21 @@ namespace paddle {
namespace inference {
namespace tensorrt {
bool to_skip_merging_optimize(TensorRTEngine* engine_,
const std::vector<int>& filters,
const std::vector<int>& strides,
const std::vector<int>& paddings,
std::string input_name) {
if (engine_->itensor_quote_num[input_name] > 0) {
return true;
}
if (filters[0] == 1 && filters[1] == 1 && strides[0] == 1 &&
strides[1] == 1 && paddings[0] == 0 && paddings[1] == 0)
engine_->itensor_quote_num[input_name] += 1;
return false;
}
class Conv2dOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
......@@ -31,6 +46,7 @@ class Conv2dOpConverter : public OpConverter {
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1);
auto* X = engine_->GetITensor(op_desc.Input("Input").front());
// Declare weights
auto* Y_v = scope.FindVar(op_desc.Input("Filter").front());
PADDLE_ENFORCE_NOT_NULL(Y_v);
......@@ -83,7 +99,10 @@ class Conv2dOpConverter : public OpConverter {
std::move(weight_tensor);
layer->getOutput(0)->setName(output_name.c_str());
engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode) {
if (test_mode ||
to_skip_merging_optimize(engine_, {filter_h, filter_w}, strides,
paddings, op_desc.Input("Input").front())) {
engine_->DeclareOutput(output_name);
}
}
......
......@@ -133,6 +133,10 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, int offset,
buffer_sizes_[name] = 0;
}
bool TensorRTEngine::HasDeclared(const std::string &name) {
return buffer_sizes_.count(name) > 0;
}
void TensorRTEngine::DeclareOutput(const std::string &name) {
PADDLE_ENFORCE_EQ(0, buffer_sizes_.count(name), "duplicate output name %s",
name);
......
......@@ -91,6 +91,8 @@ class TensorRTEngine : public EngineBase {
const std::string& name);
// Set the itensor_map_[name] as the network's output, and set its name.
void DeclareOutput(const std::string& name);
// Check if the ITensor has been declared
bool HasDeclared(const std::string& name);
// GPU memory address for an ITensor with specific name. One can operate on
// these memory directly for acceleration, for example, output the converted
......@@ -132,6 +134,16 @@ class TensorRTEngine : public EngineBase {
std::unordered_map<std::string /*name*/, std::unique_ptr<framework::Tensor>>
weight_map;
// TODO: (NHZLX)
// In the normal case, the paddle-trt exists bug when runing the googlenet.
// When there are more than two convolutions of 1 * 1 with the same input, the
// paddle-tensorrt will do the merging optimization, which fuse those conv
// into
// one conv, and then trigger bug. So, We should use strategy to avoid this
// optimization for the time being. This bug will be fixed in the future.
std::unordered_map<std::string /*name*/, int /*ITensor_quote_num*/>
itensor_quote_num;
private:
// the max batch size
int max_batch_;
......
......@@ -93,11 +93,16 @@ void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) {
}
}
TEST(trt_models_test, main) {
std::vector<std::string> infer_models = {"mobilenet", "resnet50",
"resnext50"};
for (auto &model_dir : infer_models) {
CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + model_dir);
}
TEST(trt_models_test, mobilenet) {
CompareTensorRTWithFluid(1, FLAGS_dirname + "/mobilenet");
}
TEST(trt_models_test, resnet50) {
CompareTensorRTWithFluid(1, FLAGS_dirname + "/resnet50");
}
TEST(trt_models_test, resnext50) {
CompareTensorRTWithFluid(1, FLAGS_dirname + "/resnext50");
}
} // namespace paddle
......@@ -223,7 +223,9 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// Add outputs
for (auto& output : output_maps) {
engine->DeclareOutput(output);
if (!engine->HasDeclared(output)) {
engine->DeclareOutput(output);
}
}
engine->FreezeNetwork();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册