提交 86b99ac9 编写于 作者: N nhzlx

fix comments and fix bug

上级 9d98ca04
...@@ -18,7 +18,7 @@ namespace paddle { ...@@ -18,7 +18,7 @@ namespace paddle {
namespace inference { namespace inference {
namespace tensorrt { namespace tensorrt {
bool if_skip_merging_optimize(TensorRTEngine* engine_, bool to_skip_merging_optimize(TensorRTEngine* engine_,
const std::vector<int>& filters, const std::vector<int>& filters,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings,
...@@ -101,7 +101,7 @@ class Conv2dOpConverter : public OpConverter { ...@@ -101,7 +101,7 @@ class Conv2dOpConverter : public OpConverter {
engine_->SetITensor(output_name, layer->getOutput(0)); engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode || if (test_mode ||
if_skip_merging_optimize(engine_, {filter_h, filter_w}, strides, to_skip_merging_optimize(engine_, {filter_h, filter_w}, strides,
paddings, op_desc.Input("Input").front())) { paddings, op_desc.Input("Input").front())) {
engine_->DeclareOutput(output_name); engine_->DeclareOutput(output_name);
} }
......
...@@ -133,6 +133,10 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, int offset, ...@@ -133,6 +133,10 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, int offset,
buffer_sizes_[name] = 0; buffer_sizes_[name] = 0;
} }
bool TensorRTEngine::HasDeclared(const std::string &name) {
return buffer_sizes_.count(name) > 0;
}
void TensorRTEngine::DeclareOutput(const std::string &name) { void TensorRTEngine::DeclareOutput(const std::string &name) {
PADDLE_ENFORCE_EQ(0, buffer_sizes_.count(name), "duplicate output name %s", PADDLE_ENFORCE_EQ(0, buffer_sizes_.count(name), "duplicate output name %s",
name); name);
......
...@@ -91,6 +91,8 @@ class TensorRTEngine : public EngineBase { ...@@ -91,6 +91,8 @@ class TensorRTEngine : public EngineBase {
const std::string& name); const std::string& name);
// Set the itensor_map_[name] as the network's output, and set its name. // Set the itensor_map_[name] as the network's output, and set its name.
void DeclareOutput(const std::string& name); void DeclareOutput(const std::string& name);
// Check if the ITensor has been declared
bool HasDeclared(const std::string& name);
// GPU memory address for an ITensor with specific name. One can operate on // GPU memory address for an ITensor with specific name. One can operate on
// these memory directly for acceleration, for example, output the converted // these memory directly for acceleration, for example, output the converted
......
...@@ -96,11 +96,16 @@ void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) { ...@@ -96,11 +96,16 @@ void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) {
} }
} }
TEST(trt_models_test, main) { TEST(trt_models_test, mobilenet) {
std::vector<std::string> infer_models = {"mobilenet", "resnet50", CompareTensorRTWithFluid(1, FLAGS_dirname + "/mobilenet");
"resnext50"}; }
for (auto &model_dir : infer_models) {
CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + model_dir); TEST(trt_models_test, resnet50) {
} CompareTensorRTWithFluid(1, FLAGS_dirname + "/resnet50");
} }
TEST(trt_models_test, resnext50) {
CompareTensorRTWithFluid(1, FLAGS_dirname + "/resnext50");
}
} // namespace paddle } // namespace paddle
...@@ -223,7 +223,9 @@ class TensorRTEngineKernel : public framework::OpKernel<T> { ...@@ -223,7 +223,9 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// Add outputs // Add outputs
for (auto& output : output_maps) { for (auto& output : output_maps) {
engine->DeclareOutput(output); if (!engine->HasDeclared(output)) {
engine->DeclareOutput(output);
}
} }
engine->FreezeNetwork(); engine->FreezeNetwork();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册