From 940f5dbcac12babd15e097ec2c30291cc805acac Mon Sep 17 00:00:00 2001 From: nhzlx Date: Mon, 23 Jul 2018 11:44:37 +0000 Subject: [PATCH] modify the tensorrt engine op to adapt to chage --- paddle/fluid/operators/tensorrt_engine_op.cc | 7 ++-- paddle/fluid/operators/tensorrt_engine_op.h | 8 ++--- .../operators/tensorrt_engine_op_test.cc | 32 +++++++++---------- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/operators/tensorrt_engine_op.cc b/paddle/fluid/operators/tensorrt_engine_op.cc index 647cfc0a0af..048f4d8ce81 100644 --- a/paddle/fluid/operators/tensorrt_engine_op.cc +++ b/paddle/fluid/operators/tensorrt_engine_op.cc @@ -53,13 +53,14 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector &shape) { PADDLE_ENFORCE_LE(shape.size(), 4UL, "TensorRT' tensor input requires at most 4 dimensions"); + // We should delete the batch size here. switch (shape.size()) { case 2: - return nvinfer1::Dims2(shape[0], shape[1]); + return nvinfer1::Dims2(1, shape[1]); case 3: - return nvinfer1::Dims3(shape[0], shape[1], shape[2]); + return nvinfer1::Dims3(1, shape[1], shape[2]); case 4: - return nvinfer1::Dims4(shape[0], shape[1], shape[2], shape[3]); + return nvinfer1::Dims4(1, shape[1], shape[2], shape[3]); default: return nvinfer1::Dims(); } diff --git a/paddle/fluid/operators/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt_engine_op.h index 1602a913aee..203fb5f2ba9 100644 --- a/paddle/fluid/operators/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt_engine_op.h @@ -95,16 +95,14 @@ class TensorRTEngineKernel : public framework::OpKernel { PADDLE_ENFORCE_NOT_NULL(fluid_v, "no output variable called %s", y); auto* fluid_t = fluid_v->GetMutable(); fluid_t->Resize(framework::make_ddim(ddim)); - auto size = inference::analysis::AccuDims(dims.d, dims.nbDims); + if (platform::is_cpu_place(fluid_t->place())) { // TODO(Superjomn) change this float to dtype size. engine->GetOutputInCPU( - y, fluid_t->mutable_data(platform::CPUPlace()), - size * sizeof(float)); + y, fluid_t->mutable_data(platform::CPUPlace())); } else { engine->GetOutputInGPU( - y, fluid_t->mutable_data(platform::CUDAPlace()), - size * sizeof(float)); + y, fluid_t->mutable_data(platform::CUDAPlace())); } } diff --git a/paddle/fluid/operators/tensorrt_engine_op_test.cc b/paddle/fluid/operators/tensorrt_engine_op_test.cc index 82a16361e40..9b46fbb72b1 100644 --- a/paddle/fluid/operators/tensorrt_engine_op_test.cc +++ b/paddle/fluid/operators/tensorrt_engine_op_test.cc @@ -64,36 +64,37 @@ TEST(TensorRTEngineOp, manual) { LOG(INFO) << "create block desc"; framework::BlockDesc block_desc(&program, block_); - LOG(INFO) << "create mul op"; - auto* mul = block_desc.AppendOp(); - mul->SetType("mul"); - mul->SetInput("X", std::vector({"x"})); // 2 x 4 - mul->SetInput("Y", std::vector({"y"})); // 4 x 6 - mul->SetOutput("Out", std::vector({"z"})); // 2 x 6 + LOG(INFO) << "create fc op"; + auto* fc0 = block_desc.AppendOp(); + fc0->SetType("mul"); + fc0->SetInput("X", std::vector({"x"})); // 4 x 1 x 1 + fc0->SetInput("Y", std::vector({"y"})); // 4 x 6 + fc0->SetOutput("Out", std::vector({"z"})); // 6 x 1 x 1 LOG(INFO) << "create fc op"; - auto* fc = block_desc.AppendOp(); - fc->SetType("mul"); - fc->SetInput("X", std::vector({"z"})); - fc->SetInput("Y", std::vector({"y0"})); // 6 x 8 - fc->SetOutput("Out", std::vector({"z0"})); // 2 x 8 + auto* fc1 = block_desc.AppendOp(); + fc1->SetType("mul"); + fc1->SetInput("X", std::vector({"z"})); + fc1->SetInput("Y", std::vector({"y0"})); // 6 x 8 + fc1->SetOutput("Out", std::vector({"z0"})); // 8 x 1 x 1 // Set inputs' variable shape in BlockDesc - AddTensorToBlockDesc(block_, "x", std::vector({2, 4})); + // the batch size is 2, so the dims of 'x' is {2, 4, 1, 1} + AddTensorToBlockDesc(block_, "x", std::vector({2, 4, 1, 1})); AddTensorToBlockDesc(block_, "y", std::vector({4, 6})); AddTensorToBlockDesc(block_, "y0", std::vector({6, 8})); AddTensorToBlockDesc(block_, "z", std::vector({2, 6})); // It is wired, need to copy manually. - *block_->add_ops() = *mul->Proto(); - *block_->add_ops() = *fc->Proto(); + *block_->add_ops() = *fc0->Proto(); + *block_->add_ops() = *fc1->Proto(); ASSERT_EQ(block_->ops_size(), 2); LOG(INFO) << "create tensorrt desc"; framework::OpDesc engine_op_desc(nullptr); engine_op_desc.SetType("tensorrt_engine"); - engine_op_desc.SetInput("Xs", std::vector({"x", "y", "y0"})); + engine_op_desc.SetInput("Xs", std::vector({"x"})); engine_op_desc.SetOutput("Ys", std::vector({"z0"})); SetAttr(engine_op_desc.Proto(), "subgraph", block_->SerializeAsString()); @@ -208,4 +209,3 @@ TEST(TensorRTEngineOp, fc) { Execute(40, 28, 28); } } // namespace paddle USE_TRT_CONVERTER(mul) -USE_TRT_CONVERTER(fc) -- GitLab