提交 55334007 编写于 作者: N nhzlx

fix comments

上级 0dcbeda2
...@@ -30,6 +30,7 @@ class Pool2dOpConverter : public OpConverter { ...@@ -30,6 +30,7 @@ class Pool2dOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]); auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]);
std::string pool_type = std::string pool_type =
boost::get<std::string>(op_desc.GetAttr("pooling_type")); boost::get<std::string>(op_desc.GetAttr("pooling_type"));
std::vector<int> ksize = std::vector<int> ksize =
...@@ -45,18 +46,18 @@ class Pool2dOpConverter : public OpConverter { ...@@ -45,18 +46,18 @@ class Pool2dOpConverter : public OpConverter {
PADDLE_ENFORCE_EQ(input1->getDimensions().nbDims, 3UL); PADDLE_ENFORCE_EQ(input1->getDimensions().nbDims, 3UL);
nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kMAX; nvinfer1::PoolingType nv_pool_type = nvinfer1::PoolingType::kMAX;
if (pool_type == "max") { if (pool_type == "max") {
pool_t = nvinfer1::PoolingType::kMAX; nv_pool_type = nvinfer1::PoolingType::kMAX;
} else if (pool_type == "avg") { } else if (pool_type == "avg") {
pool_t = nvinfer1::PoolingType::kAVERAGE; nv_pool_type = nvinfer1::PoolingType::kAVERAGE;
} else { } else {
PADDLE_THROW("TensorRT unsupported pooling type!"); PADDLE_THROW("TensorRT unsupported pooling type!");
} }
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling,
*const_cast<nvinfer1::ITensor*>(input1), *const_cast<nvinfer1::ITensor*>(input1),
pool_t, nv_ksize); nv_pool_type, nv_ksize);
PADDLE_ENFORCE_NOT_NULL(layer, "pool layer could not be created."); PADDLE_ENFORCE_NOT_NULL(layer, "pool layer could not be created.");
layer->setStride(nv_strides); layer->setStride(nv_strides);
layer->setPadding(nv_paddings); layer->setPadding(nv_paddings);
......
...@@ -23,8 +23,7 @@ namespace tensorrt { ...@@ -23,8 +23,7 @@ namespace tensorrt {
TEST(ReluOpConverter, main) { TEST(ReluOpConverter, main) {
framework::Scope scope; framework::Scope scope;
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
int runtime_batch = 3; TRTConvertValidation validator(10, parameters, scope, 1000);
TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
validator.DeclInputVar("relu-X", nvinfer1::Dims2(10, 6)); validator.DeclInputVar("relu-X", nvinfer1::Dims2(10, 6));
validator.DeclOutputVar("relu-Out", nvinfer1::Dims2(10, 6)); validator.DeclOutputVar("relu-Out", nvinfer1::Dims2(10, 6));
...@@ -38,7 +37,7 @@ TEST(ReluOpConverter, main) { ...@@ -38,7 +37,7 @@ TEST(ReluOpConverter, main) {
validator.SetOp(*desc.Proto()); validator.SetOp(*desc.Proto());
LOG(INFO) << "execute"; LOG(INFO) << "execute";
validator.Execute(runtime_batch); validator.Execute(5);
} }
} // namespace tensorrt } // namespace tensorrt
......
...@@ -23,8 +23,7 @@ namespace tensorrt { ...@@ -23,8 +23,7 @@ namespace tensorrt {
TEST(fc_op, test) { TEST(fc_op, test) {
std::unordered_set<std::string> parameters({"mul-Y"}); std::unordered_set<std::string> parameters({"mul-Y"});
framework::Scope scope; framework::Scope scope;
int runtime_batch = 2; TRTConvertValidation validator(10, parameters, scope, 1000);
TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
validator.DeclInputVar("mul-X", nvinfer1::Dims3(10, 1, 1)); validator.DeclInputVar("mul-X", nvinfer1::Dims3(10, 1, 1));
validator.DeclParamVar("mul-Y", nvinfer1::Dims2(10, 2)); validator.DeclParamVar("mul-Y", nvinfer1::Dims2(10, 2));
validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(1, 2)); validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(1, 2));
...@@ -38,7 +37,7 @@ TEST(fc_op, test) { ...@@ -38,7 +37,7 @@ TEST(fc_op, test) {
validator.SetOp(*desc.Proto()); validator.SetOp(*desc.Proto());
validator.Execute(runtime_batch); validator.Execute(10);
} }
} // namespace tensorrt } // namespace tensorrt
......
...@@ -23,8 +23,7 @@ namespace tensorrt { ...@@ -23,8 +23,7 @@ namespace tensorrt {
TEST(MulOpConverter, main) { TEST(MulOpConverter, main) {
framework::Scope scope; framework::Scope scope;
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
int runtime_batch = 0; TRTConvertValidation validator(10, parameters, scope, 1000, false);
TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
validator.DeclInputVar("mul-X", nvinfer1::Dims2(10, 6)); validator.DeclInputVar("mul-X", nvinfer1::Dims2(10, 6));
validator.DeclInputVar("mul-Y", nvinfer1::Dims2(6, 10)); validator.DeclInputVar("mul-Y", nvinfer1::Dims2(6, 10));
validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(10, 10)); validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(10, 10));
...@@ -40,7 +39,7 @@ TEST(MulOpConverter, main) { ...@@ -40,7 +39,7 @@ TEST(MulOpConverter, main) {
validator.SetOp(*desc.Proto()); validator.SetOp(*desc.Proto());
LOG(INFO) << "execute"; LOG(INFO) << "execute";
validator.Execute(1); validator.Execute(2);
} }
} // namespace tensorrt } // namespace tensorrt
......
...@@ -23,12 +23,10 @@ namespace tensorrt { ...@@ -23,12 +23,10 @@ namespace tensorrt {
TEST(Pool2dOpConverter, main) { TEST(Pool2dOpConverter, main) {
framework::Scope scope; framework::Scope scope;
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
int runtime_batch = 3; TRTConvertValidation validator(5, parameters, scope, 1 << 15);
TRTConvertValidation validator(5, parameters, scope, 1 << 15, runtime_batch);
// We have already set the runtime batchsize, so the // The ITensor's Dims should not contain the batch size.
// Dims should not contain the batch size. // So, the ITensor's Dims of input and output should be C * H * W.
// The ITensor's Dims of input and output should be C * H * W.
validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 4, 4)); validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 4, 4));
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 2, 2)); validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 2, 2));
...@@ -52,7 +50,7 @@ TEST(Pool2dOpConverter, main) { ...@@ -52,7 +50,7 @@ TEST(Pool2dOpConverter, main) {
validator.SetOp(*desc.Proto()); validator.SetOp(*desc.Proto());
LOG(INFO) << "execute"; LOG(INFO) << "execute";
validator.Execute(runtime_batch); validator.Execute(3);
} }
} // namespace tensorrt } // namespace tensorrt
......
...@@ -66,10 +66,11 @@ class TRTConvertValidation { ...@@ -66,10 +66,11 @@ class TRTConvertValidation {
TRTConvertValidation(int max_batch_size, TRTConvertValidation(int max_batch_size,
const std::unordered_set<std::string>& parameters, const std::unordered_set<std::string>& parameters,
framework::Scope& scope, // NOLINT framework::Scope& scope, // NOLINT
int workspace_size = 1 << 10, int runtime_batch_size = 1) int workspace_size = 1 << 10, bool if_add_batch = true)
: parameters_(parameters), : parameters_(parameters),
scope_(scope), scope_(scope),
runtime_batch_size_(runtime_batch_size) { if_add_batch_(if_add_batch),
max_batch_size_(max_batch_size) {
// create engine. // create engine.
engine_.reset(new TensorRTEngine(max_batch_size, workspace_size, &stream_)); engine_.reset(new TensorRTEngine(max_batch_size, workspace_size, &stream_));
engine_->InitNetwork(); engine_->InitNetwork();
...@@ -102,12 +103,10 @@ class TRTConvertValidation { ...@@ -102,12 +103,10 @@ class TRTConvertValidation {
// Init Fluid tensor. // Init Fluid tensor.
std::vector<int> dim_vec(dims.d, dims.d + dims.nbDims); std::vector<int> dim_vec(dims.d, dims.d + dims.nbDims);
// There is no batchsize in ITensor's shape, but We should add it to // There is no batchsize in ITensor's shape, but We should add it to
// tensor's // tensor's shape of fluid. If the variable is not parameter and the
// shape of fluid. If the variable is not parameter and the batch size // if_add_batch_ flag is true, add the max batchsize to dim_vec.
// greater than 0, if (is_param != true && if_add_batch_ == true)
// add the batchsize to dim_vec. dim_vec.insert(dim_vec.begin(), max_batch_size_);
if (is_param != true && runtime_batch_size_ > 0)
dim_vec.insert(dim_vec.begin(), runtime_batch_size_);
auto* x = scope_.Var(name); auto* x = scope_.Var(name);
auto* x_tensor = x->GetMutable<framework::LoDTensor>(); auto* x_tensor = x->GetMutable<framework::LoDTensor>();
x_tensor->Resize(framework::make_ddim(dim_vec)); x_tensor->Resize(framework::make_ddim(dim_vec));
...@@ -141,6 +140,7 @@ class TRTConvertValidation { ...@@ -141,6 +140,7 @@ class TRTConvertValidation {
void Execute(int batch_size) { void Execute(int batch_size) {
// Execute Fluid Op // Execute Fluid Op
PADDLE_ENFORCE_LE(batch_size, max_batch_size_);
platform::CPUPlace place; platform::CPUPlace place;
platform::CPUDeviceContext ctx(place); platform::CPUDeviceContext ctx(place);
op_->Run(scope_, place); op_->Run(scope_, place);
...@@ -159,9 +159,14 @@ class TRTConvertValidation { ...@@ -159,9 +159,14 @@ class TRTConvertValidation {
auto* var = scope_.FindVar(output); auto* var = scope_.FindVar(output);
auto tensor = var->GetMutable<framework::LoDTensor>(); auto tensor = var->GetMutable<framework::LoDTensor>();
framework::TensorToVector(*tensor, ctx, &fluid_out); framework::TensorToVector(*tensor, ctx, &fluid_out);
size_t fluid_out_size = fluid_out.size();
if (if_add_batch_ == true) {
fluid_out_size = batch_size * (tensor->dims().size() / max_batch_size_);
}
// Compare two output // Compare two output
ASSERT_FALSE(fluid_out.empty()); ASSERT_FALSE(fluid_out.empty());
for (size_t i = 0; i < fluid_out.size(); i++) { for (size_t i = 0; i < fluid_out_size; i++) {
// Loose the threshold for CI in different machine model. // Loose the threshold for CI in different machine model.
EXPECT_LT(std::abs(fluid_out[i] - trt_out[i]), 2e-5); EXPECT_LT(std::abs(fluid_out[i] - trt_out[i]), 2e-5);
} }
...@@ -177,10 +182,12 @@ class TRTConvertValidation { ...@@ -177,10 +182,12 @@ class TRTConvertValidation {
std::unique_ptr<framework::OpDesc> op_desc_; std::unique_ptr<framework::OpDesc> op_desc_;
const std::unordered_set<std::string>& parameters_; const std::unordered_set<std::string>& parameters_;
framework::Scope& scope_; framework::Scope& scope_;
// It represents the runtime batchsize when we test. // The ITensor of trt does not cotain the batch size,
// If the value greater than 0, we add this to // bug, in most cases, we need to set batch size for
// the first dimension of tensor's shape of fluid. // fluid's tensor shape. This variable indicates
int runtime_batch_size_; // whether to add batch size to tensor shape of fluid.
bool if_add_batch_;
int max_batch_size_;
}; };
} // namespace tensorrt } // namespace tensorrt
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册