提交 01566fb6 编写于 作者: N nhzlx

1. support mutil batch utest 2. support pool op

上级 21890ca0
...@@ -43,6 +43,8 @@ class Pool2dOpConverter : public OpConverter { ...@@ -43,6 +43,8 @@ class Pool2dOpConverter : public OpConverter {
const nvinfer1::DimsHW nv_strides(strides[0], strides[1]); const nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
const nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]); const nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
PADDLE_ENFORCE_EQ(input1->getDimensions().nbDims, 3UL);
nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kMAX; nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kMAX;
if (pool_type == "max") { if (pool_type == "max") {
pool_t = nvinfer1::PoolingType::kMAX; pool_t = nvinfer1::PoolingType::kMAX;
......
...@@ -23,7 +23,8 @@ namespace tensorrt { ...@@ -23,7 +23,8 @@ namespace tensorrt {
TEST(ReluOpConverter, main) { TEST(ReluOpConverter, main) {
framework::Scope scope; framework::Scope scope;
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
TRTConvertValidation validator(10, parameters, scope, 1000); int runtime_batch = 3;
TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
validator.DeclInputVar("relu-X", nvinfer1::Dims2(10, 6)); validator.DeclInputVar("relu-X", nvinfer1::Dims2(10, 6));
validator.DeclOutputVar("relu-Out", nvinfer1::Dims2(10, 6)); validator.DeclOutputVar("relu-Out", nvinfer1::Dims2(10, 6));
...@@ -37,7 +38,7 @@ TEST(ReluOpConverter, main) { ...@@ -37,7 +38,7 @@ TEST(ReluOpConverter, main) {
validator.SetOp(*desc.Proto()); validator.SetOp(*desc.Proto());
LOG(INFO) << "execute"; LOG(INFO) << "execute";
validator.Execute(1); validator.Execute(runtime_batch);
} }
} // namespace tensorrt } // namespace tensorrt
......
...@@ -23,10 +23,10 @@ namespace tensorrt { ...@@ -23,10 +23,10 @@ namespace tensorrt {
TEST(fc_op, test) { TEST(fc_op, test) {
std::unordered_set<std::string> parameters({"mul-Y"}); std::unordered_set<std::string> parameters({"mul-Y"});
framework::Scope scope; framework::Scope scope;
TRTConvertValidation validator(10, parameters, scope, 1000); int runtime_batch = 2;
validator.DeclInputVar("mul-X", nvinfer1::Dims4(1, 10, 1, 1)); TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
validator.DeclInputVar("mul-X", nvinfer1::Dims3(10, 1, 1));
validator.DeclParamVar("mul-Y", nvinfer1::Dims2(10, 2)); validator.DeclParamVar("mul-Y", nvinfer1::Dims2(10, 2));
// validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2));
validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(1, 2)); validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(1, 2));
// Prepare Op description // Prepare Op description
...@@ -38,7 +38,7 @@ TEST(fc_op, test) { ...@@ -38,7 +38,7 @@ TEST(fc_op, test) {
validator.SetOp(*desc.Proto()); validator.SetOp(*desc.Proto());
validator.Execute(1); validator.Execute(runtime_batch);
} }
} // namespace tensorrt } // namespace tensorrt
......
...@@ -23,7 +23,8 @@ namespace tensorrt { ...@@ -23,7 +23,8 @@ namespace tensorrt {
TEST(MulOpConverter, main) { TEST(MulOpConverter, main) {
framework::Scope scope; framework::Scope scope;
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
TRTConvertValidation validator(10, parameters, scope, 1000); int runtime_batch = 0;
TRTConvertValidation validator(10, parameters, scope, 1000, runtime_batch);
validator.DeclInputVar("mul-X", nvinfer1::Dims2(10, 6)); validator.DeclInputVar("mul-X", nvinfer1::Dims2(10, 6));
validator.DeclInputVar("mul-Y", nvinfer1::Dims2(6, 10)); validator.DeclInputVar("mul-Y", nvinfer1::Dims2(6, 10));
validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(10, 10)); validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(10, 10));
......
...@@ -23,9 +23,14 @@ namespace tensorrt { ...@@ -23,9 +23,14 @@ namespace tensorrt {
TEST(Pool2dOpConverter, main) { TEST(Pool2dOpConverter, main) {
framework::Scope scope; framework::Scope scope;
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
TRTConvertValidation validator(10, parameters, scope, 1000); int runtime_batch = 3;
validator.DeclInputVar("pool2d-X", nvinfer1::Dims4(10, 3, 2, 2)); TRTConvertValidation validator(5, parameters, scope, 1 << 15, runtime_batch);
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims4(10, 3, 1, 1));
// We have already set the runtime batchsize, so the
// Dims should not contain the batch size.
// The ITensor's Dims of input and output should be C * H * W.
validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 4, 4));
validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 2, 2));
// Prepare Op description // Prepare Op description
framework::OpDesc desc; framework::OpDesc desc;
...@@ -34,7 +39,7 @@ TEST(Pool2dOpConverter, main) { ...@@ -34,7 +39,7 @@ TEST(Pool2dOpConverter, main) {
desc.SetOutput("Out", {"pool2d-Out"}); desc.SetOutput("Out", {"pool2d-Out"});
std::vector<int> ksize({2, 2}); std::vector<int> ksize({2, 2});
std::vector<int> strides({1, 1}); std::vector<int> strides({2, 2});
std::vector<int> paddings({0, 0}); std::vector<int> paddings({0, 0});
std::string pooling_t = "max"; std::string pooling_t = "max";
...@@ -42,18 +47,12 @@ TEST(Pool2dOpConverter, main) { ...@@ -42,18 +47,12 @@ TEST(Pool2dOpConverter, main) {
desc.SetAttr("ksize", ksize); desc.SetAttr("ksize", ksize);
desc.SetAttr("strides", strides); desc.SetAttr("strides", strides);
desc.SetAttr("paddings", paddings); desc.SetAttr("paddings", paddings);
// std::string temp = "";
// (*desc.Proto()).SerializeToString(&temp);
// std::cout << temp << std::endl;
// std::ofstream f("__temp__", std::ios::out);
// f << temp;
LOG(INFO) << "set OP"; LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto()); validator.SetOp(*desc.Proto());
LOG(INFO) << "execute"; LOG(INFO) << "execute";
validator.Execute(10); validator.Execute(runtime_batch);
} }
} // namespace tensorrt } // namespace tensorrt
......
...@@ -63,13 +63,15 @@ class TRTConvertValidation { ...@@ -63,13 +63,15 @@ class TRTConvertValidation {
public: public:
TRTConvertValidation() = delete; TRTConvertValidation() = delete;
TRTConvertValidation(int batch_size, TRTConvertValidation(int max_batch_size,
const std::unordered_set<std::string>& parameters, const std::unordered_set<std::string>& parameters,
framework::Scope& scope, // NOLINT framework::Scope& scope, // NOLINT
int workspace_size = 1 << 10) int workspace_size = 1 << 10, int runtime_batch_size = 1)
: parameters_(parameters), scope_(scope) { : parameters_(parameters),
scope_(scope),
runtime_batch_size_(runtime_batch_size) {
// create engine. // create engine.
engine_.reset(new TensorRTEngine(batch_size, workspace_size, &stream_)); engine_.reset(new TensorRTEngine(max_batch_size, workspace_size, &stream_));
engine_->InitNetwork(); engine_->InitNetwork();
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0); PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
...@@ -84,7 +86,7 @@ class TRTConvertValidation { ...@@ -84,7 +86,7 @@ class TRTConvertValidation {
// Declare a parameter varaible in the scope. // Declare a parameter varaible in the scope.
void DeclParamVar(const std::string& name, const nvinfer1::Dims& dims) { void DeclParamVar(const std::string& name, const nvinfer1::Dims& dims) {
DeclVar(name, dims); DeclVar(name, dims, true);
} }
void DeclOutputVar(const std::string& name, const nvinfer1::Dims& dims) { void DeclOutputVar(const std::string& name, const nvinfer1::Dims& dims) {
...@@ -92,12 +94,20 @@ class TRTConvertValidation { ...@@ -92,12 +94,20 @@ class TRTConvertValidation {
} }
// Declare a variable in a fluid Scope. // Declare a variable in a fluid Scope.
void DeclVar(const std::string& name, const nvinfer1::Dims& dims) { void DeclVar(const std::string& name, const nvinfer1::Dims& dims,
bool is_param = false) {
platform::CPUPlace place; platform::CPUPlace place;
platform::CPUDeviceContext ctx(place); platform::CPUDeviceContext ctx(place);
// Init Fluid tensor. // Init Fluid tensor.
std::vector<int> dim_vec(dims.d, dims.d + dims.nbDims); std::vector<int> dim_vec(dims.d, dims.d + dims.nbDims);
// There is no batchsize in ITensor's shape, but We should add it to
// tensor's
// shape of fluid. If the variable is not parameter and the batch size
// greater than 0,
// add the batchsize to dim_vec.
if (is_param != true && runtime_batch_size_ > 0)
dim_vec.insert(dim_vec.begin(), runtime_batch_size_);
auto* x = scope_.Var(name); auto* x = scope_.Var(name);
auto* x_tensor = x->GetMutable<framework::LoDTensor>(); auto* x_tensor = x->GetMutable<framework::LoDTensor>();
x_tensor->Resize(framework::make_ddim(dim_vec)); x_tensor->Resize(framework::make_ddim(dim_vec));
...@@ -167,6 +177,10 @@ class TRTConvertValidation { ...@@ -167,6 +177,10 @@ class TRTConvertValidation {
std::unique_ptr<framework::OpDesc> op_desc_; std::unique_ptr<framework::OpDesc> op_desc_;
const std::unordered_set<std::string>& parameters_; const std::unordered_set<std::string>& parameters_;
framework::Scope& scope_; framework::Scope& scope_;
// It represents the runtime batchsize when we test.
// If the value greater than 0, we add this to
// the first dimension of tensor's shape of fluid.
int runtime_batch_size_;
}; };
} // namespace tensorrt } // namespace tensorrt
......
...@@ -113,7 +113,7 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) { ...@@ -113,7 +113,7 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
ASSERT_EQ(y_cpu[1], 14.5); ASSERT_EQ(y_cpu[1], 14.5);
} }
TEST_F(TensorRTEngineTest, test_conv2d_temp) { TEST_F(TensorRTEngineTest, test_conv2d) {
// Weight in CPU memory. // Weight in CPU memory.
float raw_weight[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; float raw_weight[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
float raw_bias[1] = {0}; float raw_bias[1] = {0};
...@@ -146,6 +146,37 @@ TEST_F(TensorRTEngineTest, test_conv2d_temp) { ...@@ -146,6 +146,37 @@ TEST_F(TensorRTEngineTest, test_conv2d_temp) {
ASSERT_EQ(y_cpu[1], 6.0); ASSERT_EQ(y_cpu[1], 6.0);
} }
TEST_F(TensorRTEngineTest, test_pool2d) {
// Weight in CPU memory.
auto* x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
nvinfer1::Dims3{1, 2, 2});
nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kAVERAGE;
auto* pool_layer =
TRT_ENGINE_ADD_LAYER(engine_, Pooling, *const_cast<nvinfer1::ITensor*>(x),
pool_t, nvinfer1::DimsHW{2, 2});
PADDLE_ENFORCE(pool_layer != nullptr);
pool_layer->setStride(nvinfer1::DimsHW{1, 1});
pool_layer->setPadding(nvinfer1::DimsHW{0, 0});
engine_->DeclareOutput(pool_layer, 0, "y");
engine_->FreezeNetwork();
ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
float x_v[8] = {1.0, 2.0, 5.0, 0.0, 2.0, 3.0, 5.0, 10.0};
engine_->SetInputFromCPU("x", reinterpret_cast<void*>(&x_v),
8 * sizeof(float));
engine_->Execute(2);
LOG(INFO) << "to get output";
float* y_cpu = new float[2];
engine_->GetOutputInCPU("y", &y_cpu[0], 2 * sizeof(float));
ASSERT_EQ(y_cpu[0], 2.0);
ASSERT_EQ(y_cpu[1], 5.0);
}
} // namespace tensorrt } // namespace tensorrt
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册