提交 55334007 编写于 作者: N nhzlx

fix comments

上级 0dcbeda2
......@@ -30,6 +30,7 @@ class Pool2dOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]);
std::string pool_type =
boost::get<std::string>(op_desc.GetAttr("pooling_type"));
std::vector<int> ksize =
......@@ -45,18 +46,18 @@ class Pool2dOpConverter : public OpConverter {
PADDLE_ENFORCE_EQ(input1->getDimensions().nbDims, 3UL);
nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kMAX;
nvinfer1::PoolingType nv_pool_type = nvinfer1::PoolingType::kMAX;
if (pool_type == "max") {
pool_t = nvinfer1::PoolingType::kMAX;
nv_pool_type = nvinfer1::PoolingType::kMAX;
} else if (pool_type == "avg") {
pool_t = nvinfer1::PoolingType::kAVERAGE;
nv_pool_type = nvinfer1::PoolingType::kAVERAGE;
} else {
PADDLE_THROW("TensorRT unsupported pooling type!");
}
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling,
*const_cast<nvinfer1::ITensor*>(input1),
pool_t, nv_ksize);
nv_pool_type, nv_ksize);
PADDLE_ENFORCE_NOT_NULL(layer, "pool layer could not be created.");
layer->setStride(nv_strides);
layer->setPadding(nv_paddings);
......
......@@ -23,8 +23,7 @@ namespace tensorrt {
TEST(ReluOpConverter, main) {
framework::Scope scope;
std::unordered_set<std::string> parameters;
int runtime_batch = 3;
TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
TRTConvertValidation validator(10, parameters, scope, 1000);
validator.DeclInputVar("relu-X", nvinfer1::Dims2(10, 6));
validator.DeclOutputVar("relu-Out", nvinfer1::Dims2(10, 6));
......@@ -38,7 +37,7 @@ TEST(ReluOpConverter, main) {
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(runtime_batch);
validator.Execute(5);
}
} // namespace tensorrt
......
......@@ -23,8 +23,7 @@ namespace tensorrt {
TEST(fc_op, test) {
std::unordered_set<std::string> parameters({"mul-Y"});
framework::Scope scope;
int runtime_batch = 2;
TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
TRTConvertValidation validator(10, parameters, scope, 1000);
validator.DeclInputVar("mul-X", nvinfer1::Dims3(10, 1, 1));
validator.DeclParamVar("mul-Y", nvinfer1::Dims2(10, 2));
validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(1, 2));
......@@ -38,7 +37,7 @@ TEST(fc_op, test) {
validator.SetOp(*desc.Proto());
validator.Execute(runtime_batch);
validator.Execute(10);
}
} // namespace tensorrt
......
......@@ -23,8 +23,7 @@ namespace tensorrt {
TEST(MulOpConverter, main) {
framework::Scope scope;
std::unordered_set<std::string> parameters;
int runtime_batch = 0;
TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
TRTConvertValidation validator(10, parameters, scope, 1000, false);
validator.DeclInputVar("mul-X", nvinfer1::Dims2(10, 6));
validator.DeclInputVar("mul-Y", nvinfer1::Dims2(6, 10));
validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(10, 10));
......@@ -40,7 +39,7 @@ TEST(MulOpConverter, main) {
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(1);
validator.Execute(2);
}
} // namespace tensorrt
......
......@@ -23,12 +23,10 @@ namespace tensorrt {
TEST(Pool2dOpConverter, main) {
framework::Scope scope;
std::unordered_set<std::string> parameters;
int runtime_batch = 3;
TRTConvertValidation validator(5, parameters, scope, 1 << 15, runtime_batch);
TRTConvertValidation validator(5, parameters, scope, 1 << 15);
// We have already set the runtime batchsize, so the
// Dims should not contain the batch size.
// The ITensor's Dims of input and output should be C * H * W.
// The ITensor's Dims should not contain the batch size.
// So, the ITensor's Dims of input and output should be C * H * W.
validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 4, 4));
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 2, 2));
......@@ -52,7 +50,7 @@ TEST(Pool2dOpConverter, main) {
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(runtime_batch);
validator.Execute(3);
}
} // namespace tensorrt
......
......@@ -66,10 +66,11 @@ class TRTConvertValidation {
TRTConvertValidation(int max_batch_size,
const std::unordered_set<std::string>& parameters,
framework::Scope& scope, // NOLINT
int workspace_size = 1 << 10, int runtime_batch_size = 1)
int workspace_size = 1 << 10, bool if_add_batch = true)
: parameters_(parameters),
scope_(scope),
runtime_batch_size_(runtime_batch_size) {
if_add_batch_(if_add_batch),
max_batch_size_(max_batch_size) {
// create engine.
engine_.reset(new TensorRTEngine(max_batch_size, workspace_size, &stream_));
engine_->InitNetwork();
......@@ -102,12 +103,10 @@ class TRTConvertValidation {
// Init Fluid tensor.
std::vector<int> dim_vec(dims.d, dims.d + dims.nbDims);
// There is no batchsize in ITensor's shape, but We should add it to
// tensor's
// shape of fluid. If the variable is not parameter and the batch size
// greater than 0,
// add the batchsize to dim_vec.
if (is_param != true && runtime_batch_size_ > 0)
dim_vec.insert(dim_vec.begin(), runtime_batch_size_);
// tensor's shape of fluid. If the variable is not parameter and the
// if_add_batch_ flag is true, add the max batchsize to dim_vec.
if (is_param != true && if_add_batch_ == true)
dim_vec.insert(dim_vec.begin(), max_batch_size_);
auto* x = scope_.Var(name);
auto* x_tensor = x->GetMutable<framework::LoDTensor>();
x_tensor->Resize(framework::make_ddim(dim_vec));
......@@ -141,6 +140,7 @@ class TRTConvertValidation {
void Execute(int batch_size) {
// Execute Fluid Op
PADDLE_ENFORCE_LE(batch_size, max_batch_size_);
platform::CPUPlace place;
platform::CPUDeviceContext ctx(place);
op_->Run(scope_, place);
......@@ -159,9 +159,14 @@ class TRTConvertValidation {
auto* var = scope_.FindVar(output);
auto tensor = var->GetMutable<framework::LoDTensor>();
framework::TensorToVector(*tensor, ctx, &fluid_out);
size_t fluid_out_size = fluid_out.size();
if (if_add_batch_ == true) {
fluid_out_size = batch_size * (tensor->dims().size() / max_batch_size_);
}
// Compare two output
ASSERT_FALSE(fluid_out.empty());
for (size_t i = 0; i < fluid_out.size(); i++) {
for (size_t i = 0; i < fluid_out_size; i++) {
// Loose the threshold for CI in different machine model.
EXPECT_LT(std::abs(fluid_out[i] - trt_out[i]), 2e-5);
}
......@@ -177,10 +182,12 @@ class TRTConvertValidation {
std::unique_ptr<framework::OpDesc> op_desc_;
const std::unordered_set<std::string>& parameters_;
framework::Scope& scope_;
// It represents the runtime batchsize when we test.
// If the value greater than 0, we add this to
// the first dimension of tensor's shape of fluid.
int runtime_batch_size_;
// The ITensor of trt does not cotain the batch size,
// bug, in most cases, we need to set batch size for
// fluid's tensor shape. This variable indicates
// whether to add batch size to tensor shape of fluid.
bool if_add_batch_;
int max_batch_size_;
};
} // namespace tensorrt
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册