diff --git a/core/general-server/op/general_detection_op.cpp b/core/general-server/op/general_detection_op.cpp index 46f5ddf1b508681661b69c60a25b6d7d000e6d4e..b62a2d2544e12d493033cf1bb8e6606d72f614d3 100644 --- a/core/general-server/op/general_detection_op.cpp +++ b/core/general-server/op/general_detection_op.cpp @@ -191,42 +191,64 @@ int GeneralDetectionOp::inference() { boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg); - for (int i = boxes.size() - 1; i >= 0; i--) { - crop_img = GetRotateCropImage(img, boxes[i]); - - float wh_ratio = float(crop_img.cols) / float(crop_img.rows); + float max_wh_ratio = 0.0f; + std::vector crop_imgs; + std::vector resize_imgs; + int max_resize_w = 0; + int max_resize_h = 0; + int box_num = boxes.size(); + std::vector> output_rec; + for (int i = 0; i < box_num; ++i) { + cv::Mat line_img = GetRotateCropImage(img, boxes[i]); + float wh_ratio = float(line_img.cols) / float(line_img.rows); + max_wh_ratio = max_wh_ratio > wh_ratio ? max_wh_ratio : wh_ratio; + crop_imgs.push_back(line_img); + } + for (int i = 0; i < box_num; ++i) { + cv::Mat resize_img; + crop_img = crop_imgs[i]; this->resize_op_rec.Run( - crop_img, resize_img_rec, wh_ratio, this->use_tensorrt_); + crop_img, resize_img, max_wh_ratio, this->use_tensorrt_); this->normalize_op_.Run( - &resize_img_rec, this->mean_rec, this->scale_rec, this->is_scale_); - - std::vector output_rec( - 1 * 3 * resize_img_rec.rows * resize_img_rec.cols, 0.0f); - - this->permute_op_.Run(&resize_img_rec, output_rec.data()); - - // Inference. - output_shape = {1, 3, resize_img_rec.rows, resize_img_rec.cols}; - out_num = std::accumulate( - output_shape.begin(), output_shape.end(), 1, std::multiplies()); - databuf_size_out = out_num * sizeof(float); - databuf_data_out = MempoolWrapper::instance().malloc(databuf_size_out); - if (!databuf_data_out) { - LOG(ERROR) << "Malloc failed, size: " << databuf_size_out; - return -1; - } - memcpy(databuf_data_out, output_rec.data(), databuf_size_out); - databuf_char_out = reinterpret_cast(databuf_data_out); - paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out); - paddle::PaddleTensor tensor_out; - tensor_out.name = "image"; - tensor_out.dtype = paddle::PaddleDType::FLOAT32; - tensor_out.shape = {1, 3, resize_img_rec.rows, resize_img_rec.cols}; - tensor_out.data = paddleBuf; - out->push_back(tensor_out); + &resize_img, this->mean_rec, this->scale_rec, this->is_scale_); + + max_resize_w = std::max(max_resize_w, resize_img.cols); + max_resize_h = std::max(max_resize_h, resize_img.rows); + resize_imgs.push_back(resize_img); + } + int buf_size = 3 * max_resize_h * max_resize_w; + output_rec = std::vector>(box_num, + std::vector(buf_size, 0.0f)); + for (int i = 0; i < box_num; ++i) { + resize_img_rec = resize_imgs[i]; + + this->permute_op_.Run(&resize_img_rec, output_rec[i].data()); + } + + // Inference. + output_shape = {box_num, 3, max_resize_h, max_resize_w}; + out_num = std::accumulate( + output_shape.begin(), output_shape.end(), 1, std::multiplies()); + databuf_size_out = out_num * sizeof(float); + databuf_data_out = MempoolWrapper::instance().malloc(databuf_size_out); + if (!databuf_data_out) { + LOG(ERROR) << "Malloc failed, size: " << databuf_size_out; + return -1; + } + int offset = buf_size * sizeof(float); + for (int i = 0; i < box_num; ++i) { + memcpy(databuf_data_out + i * offset, output_rec[i].data(), offset); } + databuf_char_out = reinterpret_cast(databuf_data_out); + paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out); + paddle::PaddleTensor tensor_out; + tensor_out.name = "image"; + tensor_out.dtype = paddle::PaddleDType::FLOAT32; + tensor_out.shape = output_shape; + tensor_out.data = paddleBuf; + out->push_back(tensor_out); } out->erase(out->begin(), out->begin() + infer_outnum); diff --git a/core/general-server/op/general_detection_op.h b/core/general-server/op/general_detection_op.h index 272ed5ff40575d42ac3058ad1824285925fc252c..2cc027f5ed761f2d040c0c1858e81cb70a93fcb0 100644 --- a/core/general-server/op/general_detection_op.h +++ b/core/general-server/op/general_detection_op.h @@ -63,7 +63,7 @@ class GeneralDetectionOp double det_db_thresh_ = 0.3; double det_db_box_thresh_ = 0.5; - double det_db_unclip_ratio_ = 2.0; + double det_db_unclip_ratio_ = 1.5; std::vector mean_det = {0.485f, 0.456f, 0.406f}; std::vector scale_det = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f}; diff --git a/core/predictor/tools/ocrtools/preprocess_op.cpp b/core/predictor/tools/ocrtools/preprocess_op.cpp index ab69e4d23abbcbebfbfb5c453fbca46ff5e51967..045984e4c004f965d52badc8b8a0b8996224ab7c 100644 --- a/core/predictor/tools/ocrtools/preprocess_op.cpp +++ b/core/predictor/tools/ocrtools/preprocess_op.cpp @@ -82,14 +82,14 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, else if (resize_h / 32 < 1 + 1e-5) resize_h = 32; else - resize_h = (resize_h / 32) * 32; + resize_h = (resize_h / 32 - 1) * 32; if (resize_w % 32 == 0) resize_w = resize_w; else if (resize_w / 32 < 1 + 1e-5) resize_w = 32; else - resize_w = (resize_w / 32) * 32; + resize_w = (resize_w / 32 - 1) * 32; if (!use_tensorrt) { cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); ratio_h = float(resize_h) / float(h);