提交 01566fb6 编写于 作者: N nhzlx

1. support mutil batch utest 2. support pool op

上级 21890ca0
......@@ -43,6 +43,8 @@ class Pool2dOpConverter : public OpConverter {
const nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
const nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
PADDLE_ENFORCE_EQ(input1->getDimensions().nbDims, 3UL);
nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kMAX;
if (pool_type == "max") {
pool_t = nvinfer1::PoolingType::kMAX;
......
......@@ -23,7 +23,8 @@ namespace tensorrt {
TEST(ReluOpConverter, main) {
framework::Scope scope;
std::unordered_set<std::string> parameters;
TRTConvertValidation validator(10, parameters, scope, 1000);
int runtime_batch = 3;
TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
validator.DeclInputVar("relu-X", nvinfer1::Dims2(10, 6));
validator.DeclOutputVar("relu-Out", nvinfer1::Dims2(10, 6));
......@@ -37,7 +38,7 @@ TEST(ReluOpConverter, main) {
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(1);
validator.Execute(runtime_batch);
}
} // namespace tensorrt
......
......@@ -23,10 +23,10 @@ namespace tensorrt {
TEST(fc_op, test) {
std::unordered_set<std::string> parameters({"mul-Y"});
framework::Scope scope;
TRTConvertValidation validator(10, parameters, scope, 1000);
validator.DeclInputVar("mul-X", nvinfer1::Dims4(1, 10, 1, 1));
int runtime_batch = 2;
TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
validator.DeclInputVar("mul-X", nvinfer1::Dims3(10, 1, 1));
validator.DeclParamVar("mul-Y", nvinfer1::Dims2(10, 2));
// validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2));
validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(1, 2));
// Prepare Op description
......@@ -38,7 +38,7 @@ TEST(fc_op, test) {
validator.SetOp(*desc.Proto());
validator.Execute(1);
validator.Execute(runtime_batch);
}
} // namespace tensorrt
......
......@@ -23,7 +23,8 @@ namespace tensorrt {
TEST(MulOpConverter, main) {
framework::Scope scope;
std::unordered_set<std::string> parameters;
TRTConvertValidation validator(10, parameters, scope, 1000);
int runtime_batch = 0;
TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
validator.DeclInputVar("mul-X", nvinfer1::Dims2(10, 6));
validator.DeclInputVar("mul-Y", nvinfer1::Dims2(6, 10));
validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(10, 10));
......
......@@ -23,9 +23,14 @@ namespace tensorrt {
TEST(Pool2dOpConverter, main) {
framework::Scope scope;
std::unordered_set<std::string> parameters;
TRTConvertValidation validator(10, parameters, scope, 1000);
validator.DeclInputVar("pool2d-X", nvinfer1::Dims4(10, 3, 2, 2));
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims4(10, 3, 1, 1));
int runtime_batch = 3;
TRTConvertValidation validator(5, parameters, scope, 1 << 15, runtime_batch);
// We have already set the runtime batchsize, so the
// Dims should not contain the batch size.
// The ITensor's Dims of input and output should be C * H * W.
validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 4, 4));
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 2, 2));
// Prepare Op description
framework::OpDesc desc;
......@@ -34,7 +39,7 @@ TEST(Pool2dOpConverter, main) {
desc.SetOutput("Out", {"pool2d-Out"});
std::vector<int> ksize({2, 2});
std::vector<int> strides({1, 1});
std::vector<int> strides({2, 2});
std::vector<int> paddings({0, 0});
std::string pooling_t = "max";
......@@ -42,18 +47,12 @@ TEST(Pool2dOpConverter, main) {
desc.SetAttr("ksize", ksize);
desc.SetAttr("strides", strides);
desc.SetAttr("paddings", paddings);
// std::string temp = "";
// (*desc.Proto()).SerializeToString(&temp);
// std::cout << temp << std::endl;
// std::ofstream f("__temp__", std::ios::out);
// f << temp;
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(10);
validator.Execute(runtime_batch);
}
} // namespace tensorrt
......
......@@ -63,13 +63,15 @@ class TRTConvertValidation {
public:
TRTConvertValidation() = delete;
TRTConvertValidation(int batch_size,
TRTConvertValidation(int max_batch_size,
const std::unordered_set<std::string>& parameters,
framework::Scope& scope, // NOLINT
int workspace_size = 1 << 10)
: parameters_(parameters), scope_(scope) {
int workspace_size = 1 << 10, int runtime_batch_size = 1)
: parameters_(parameters),
scope_(scope),
runtime_batch_size_(runtime_batch_size) {
// create engine.
engine_.reset(new TensorRTEngine(batch_size, workspace_size, &stream_));
engine_.reset(new TensorRTEngine(max_batch_size, workspace_size, &stream_));
engine_->InitNetwork();
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
......@@ -84,7 +86,7 @@ class TRTConvertValidation {
// Declare a parameter varaible in the scope.
void DeclParamVar(const std::string& name, const nvinfer1::Dims& dims) {
DeclVar(name, dims);
DeclVar(name, dims, true);
}
void DeclOutputVar(const std::string& name, const nvinfer1::Dims& dims) {
......@@ -92,12 +94,20 @@ class TRTConvertValidation {
}
// Declare a variable in a fluid Scope.
void DeclVar(const std::string& name, const nvinfer1::Dims& dims) {
void DeclVar(const std::string& name, const nvinfer1::Dims& dims,
bool is_param = false) {
platform::CPUPlace place;
platform::CPUDeviceContext ctx(place);
// Init Fluid tensor.
std::vector<int> dim_vec(dims.d, dims.d + dims.nbDims);
// There is no batchsize in ITensor's shape, but We should add it to
// tensor's
// shape of fluid. If the variable is not parameter and the batch size
// greater than 0,
// add the batchsize to dim_vec.
if (is_param != true && runtime_batch_size_ > 0)
dim_vec.insert(dim_vec.begin(), runtime_batch_size_);
auto* x = scope_.Var(name);
auto* x_tensor = x->GetMutable<framework::LoDTensor>();
x_tensor->Resize(framework::make_ddim(dim_vec));
......@@ -167,6 +177,10 @@ class TRTConvertValidation {
std::unique_ptr<framework::OpDesc> op_desc_;
const std::unordered_set<std::string>& parameters_;
framework::Scope& scope_;
// It represents the runtime batchsize when we test.
// If the value greater than 0, we add this to
// the first dimension of tensor's shape of fluid.
int runtime_batch_size_;
};
} // namespace tensorrt
......
......@@ -113,7 +113,7 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
ASSERT_EQ(y_cpu[1], 14.5);
}
TEST_F(TensorRTEngineTest, test_conv2d_temp) {
TEST_F(TensorRTEngineTest, test_conv2d) {
// Weight in CPU memory.
float raw_weight[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
float raw_bias[1] = {0};
......@@ -146,6 +146,37 @@ TEST_F(TensorRTEngineTest, test_conv2d_temp) {
ASSERT_EQ(y_cpu[1], 6.0);
}
TEST_F(TensorRTEngineTest, test_pool2d) {
// Weight in CPU memory.
auto* x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
nvinfer1::Dims3{1, 2, 2});
nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kAVERAGE;
auto* pool_layer =
TRT_ENGINE_ADD_LAYER(engine_, Pooling, *const_cast<nvinfer1::ITensor*>(x),
pool_t, nvinfer1::DimsHW{2, 2});
PADDLE_ENFORCE(pool_layer != nullptr);
pool_layer->setStride(nvinfer1::DimsHW{1, 1});
pool_layer->setPadding(nvinfer1::DimsHW{0, 0});
engine_->DeclareOutput(pool_layer, 0, "y");
engine_->FreezeNetwork();
ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
float x_v[8] = {1.0, 2.0, 5.0, 0.0, 2.0, 3.0, 5.0, 10.0};
engine_->SetInputFromCPU("x", reinterpret_cast<void*>(&x_v),
8 * sizeof(float));
engine_->Execute(2);
LOG(INFO) << "to get output";
float* y_cpu = new float[2];
engine_->GetOutputInCPU("y", &y_cpu[0], 2 * sizeof(float));
ASSERT_EQ(y_cpu[0], 2.0);
ASSERT_EQ(y_cpu[1], 5.0);
}
} // namespace tensorrt
} // namespace inference
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册