提交 c2cf5351 编写于 作者: qq_25193841's avatar qq_25193841

Merge remote-tracking branch 'upstream/dygraph' into dy1

...@@ -8,7 +8,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式 ...@@ -8,7 +8,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- 静态图版本:develop分支 - 静态图版本:develop分支
**近期更新** **近期更新**
- 2021.1.4 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数142个,每周一都会更新,欢迎大家持续关注。 - 2021.1.11 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数147个,每周一都会更新,欢迎大家持续关注。
- 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。 - 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。
- 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。 - 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。
- 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941 - 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941
...@@ -102,7 +102,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式 ...@@ -102,7 +102,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- FAQ - FAQ
- [【精选】OCR精选10个问题](./doc/doc_ch/FAQ.md) - [【精选】OCR精选10个问题](./doc/doc_ch/FAQ.md)
- [【理论篇】OCR通用31个问题](./doc/doc_ch/FAQ.md) - [【理论篇】OCR通用31个问题](./doc/doc_ch/FAQ.md)
- [【实战篇】PaddleOCR实战101个问题](./doc/doc_ch/FAQ.md) - [【实战篇】PaddleOCR实战106个问题](./doc/doc_ch/FAQ.md)
- [技术交流群](#欢迎加入PaddleOCR技术交流群) - [技术交流群](#欢迎加入PaddleOCR技术交流群)
- [参考文献](./doc/doc_ch/reference.md) - [参考文献](./doc/doc_ch/reference.md)
- [许可证书](#许可证书) - [许可证书](#许可证书)
......
...@@ -52,7 +52,7 @@ If you save the model in another location, please modify the address of the mode ...@@ -52,7 +52,7 @@ If you save the model in another location, please modify the address of the mode
``` ```
bg_generator: bg_generator:
pretrain: style_text_rec/bg_generator pretrain: style_text_models/bg_generator
... ...
text_generator: text_generator:
pretrain: style_text_models/text_generator pretrain: style_text_models/text_generator
......
...@@ -102,6 +102,7 @@ Train: ...@@ -102,6 +102,7 @@ Train:
drop_last: False drop_last: False
batch_size_per_card: 16 batch_size_per_card: 16
num_workers: 8 num_workers: 8
use_shared_memory: False
Eval: Eval:
dataset: dataset:
...@@ -128,4 +129,5 @@ Eval: ...@@ -128,4 +129,5 @@ Eval:
shuffle: False shuffle: False
drop_last: False drop_last: False
batch_size_per_card: 1 # must be 1 batch_size_per_card: 1 # must be 1
num_workers: 8 num_workers: 8
\ No newline at end of file use_shared_memory: False
\ No newline at end of file
...@@ -76,6 +76,7 @@ Train: ...@@ -76,6 +76,7 @@ Train:
batch_size_per_card: 256 batch_size_per_card: 256
drop_last: True drop_last: True
num_workers: 8 num_workers: 8
use_shared_memory: False
Eval: Eval:
dataset: dataset:
...@@ -96,3 +97,4 @@ Eval: ...@@ -96,3 +97,4 @@ Eval:
drop_last: False drop_last: False
batch_size_per_card: 256 batch_size_per_card: 256
num_workers: 4 num_workers: 4
use_shared_memory: False
...@@ -47,18 +47,20 @@ public: ...@@ -47,18 +47,20 @@ public:
class ResizeImgType0 { class ResizeImgType0 {
public: public:
virtual void Run(const cv::Mat &img, cv::Mat &resize_img, int max_size_len, virtual void Run(const cv::Mat &img, cv::Mat &resize_img, int max_size_len,
float &ratio_h, float &ratio_w); float &ratio_h, float &ratio_w, bool use_tensorrt);
}; };
class CrnnResizeImg { class CrnnResizeImg {
public: public:
virtual void Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio, virtual void Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
bool use_tensorrt = false,
const std::vector<int> &rec_image_shape = {3, 32, 320}); const std::vector<int> &rec_image_shape = {3, 32, 320});
}; };
class ClsResizeImg { class ClsResizeImg {
public: public:
virtual void Run(const cv::Mat &img, cv::Mat &resize_img, virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
bool use_tensorrt = false,
const std::vector<int> &rec_image_shape = {3, 48, 192}); const std::vector<int> &rec_image_shape = {3, 48, 192});
}; };
......
...@@ -77,7 +77,7 @@ int main(int argc, char **argv) { ...@@ -77,7 +77,7 @@ int main(int argc, char **argv) {
auto end = std::chrono::system_clock::now(); auto end = std::chrono::system_clock::now();
auto duration = auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(end - start); std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Cost" std::cout << "Cost "
<< double(duration.count()) * << double(duration.count()) *
std::chrono::microseconds::period::num / std::chrono::microseconds::period::num /
std::chrono::microseconds::period::den std::chrono::microseconds::period::den
......
...@@ -25,7 +25,7 @@ cv::Mat Classifier::Run(cv::Mat &img) { ...@@ -25,7 +25,7 @@ cv::Mat Classifier::Run(cv::Mat &img) {
int index = 0; int index = 0;
float wh_ratio = float(img.cols) / float(img.rows); float wh_ratio = float(img.cols) / float(img.rows);
this->resize_op_.Run(img, resize_img, cls_image_shape); this->resize_op_.Run(img, resize_img, this->use_tensorrt_, cls_image_shape);
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
this->is_scale_); this->is_scale_);
......
...@@ -61,7 +61,8 @@ void DBDetector::Run(cv::Mat &img, ...@@ -61,7 +61,8 @@ void DBDetector::Run(cv::Mat &img,
cv::Mat srcimg; cv::Mat srcimg;
cv::Mat resize_img; cv::Mat resize_img;
img.copyTo(srcimg); img.copyTo(srcimg);
this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w); this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w,
this->use_tensorrt_);
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
this->is_scale_); this->is_scale_);
......
...@@ -33,7 +33,7 @@ void CRNNRecognizer::Run(std::vector<std::vector<std::vector<int>>> boxes, ...@@ -33,7 +33,7 @@ void CRNNRecognizer::Run(std::vector<std::vector<std::vector<int>>> boxes,
float wh_ratio = float(crop_img.cols) / float(crop_img.rows); float wh_ratio = float(crop_img.cols) / float(crop_img.rows);
this->resize_op_.Run(crop_img, resize_img, wh_ratio); this->resize_op_.Run(crop_img, resize_img, wh_ratio, this->use_tensorrt_);
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
this->is_scale_); this->is_scale_);
......
...@@ -60,7 +60,8 @@ void Normalize::Run(cv::Mat *im, const std::vector<float> &mean, ...@@ -60,7 +60,8 @@ void Normalize::Run(cv::Mat *im, const std::vector<float> &mean,
} }
void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
int max_size_len, float &ratio_h, float &ratio_w) { int max_size_len, float &ratio_h, float &ratio_w,
bool use_tensorrt) {
int w = img.cols; int w = img.cols;
int h = img.rows; int h = img.rows;
...@@ -89,14 +90,19 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, ...@@ -89,14 +90,19 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
resize_w = 32; resize_w = 32;
else else
resize_w = (resize_w / 32) * 32; resize_w = (resize_w / 32) * 32;
if (!use_tensorrt) {
cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
ratio_h = float(resize_h) / float(h);
ratio_h = float(resize_h) / float(h); ratio_w = float(resize_w) / float(w);
ratio_w = float(resize_w) / float(w); } else {
cv::resize(img, resize_img, cv::Size(640, 640));
ratio_h = float(640) / float(h);
ratio_w = float(640) / float(w);
}
} }
void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio, void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
bool use_tensorrt,
const std::vector<int> &rec_image_shape) { const std::vector<int> &rec_image_shape) {
int imgC, imgH, imgW; int imgC, imgH, imgW;
imgC = rec_image_shape[0]; imgC = rec_image_shape[0];
...@@ -111,12 +117,27 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio, ...@@ -111,12 +117,27 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
resize_w = imgW; resize_w = imgW;
else else
resize_w = int(ceilf(imgH * ratio)); resize_w = int(ceilf(imgH * ratio));
if (!use_tensorrt) {
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
cv::INTER_LINEAR); cv::INTER_LINEAR);
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0,
int(imgW - resize_img.cols), cv::BORDER_CONSTANT,
{127, 127, 127});
} else {
int k = int(img.cols * 32 / img.rows);
if (k >= 100) {
cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f,
cv::INTER_LINEAR);
} else {
cv::resize(img, resize_img, cv::Size(k, 32), 0.f, 0.f, cv::INTER_LINEAR);
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, int(100 - k),
cv::BORDER_CONSTANT, {127, 127, 127});
}
}
} }
void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
bool use_tensorrt,
const std::vector<int> &rec_image_shape) { const std::vector<int> &rec_image_shape) {
int imgC, imgH, imgW; int imgC, imgH, imgW;
imgC = rec_image_shape[0]; imgC = rec_image_shape[0];
...@@ -130,11 +151,15 @@ void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, ...@@ -130,11 +151,15 @@ void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
else else
resize_w = int(ceilf(imgH * ratio)); resize_w = int(ceilf(imgH * ratio));
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, if (!use_tensorrt) {
cv::INTER_LINEAR); cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
if (resize_w < imgW) { cv::INTER_LINEAR);
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w, if (resize_w < imgW) {
cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w,
cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
}
} else {
cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f, cv::INTER_LINEAR);
} }
} }
......
此差异已折叠。
...@@ -21,13 +21,13 @@ PaddleOCR开源的文本检测算法列表: ...@@ -21,13 +21,13 @@ PaddleOCR开源的文本检测算法列表:
|EAST|MobileNetV3|78.24%|79.15%|78.69%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)| |EAST|MobileNetV3|78.24%|79.15%|78.69%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)|
|DB|ResNet50_vd|86.41%|78.72%|82.38%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| |DB|ResNet50_vd|86.41%|78.72%|82.38%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)|
|DB|MobileNetV3|77.29%|73.08%|75.12%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| |DB|MobileNetV3|77.29%|73.08%|75.12%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)|
|SAST|ResNet50_vd|91.83%|81.80%|86.52%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)| |SAST|ResNet50_vd|91.39%|83.77%|87.42%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)|
在Total-text文本检测公开数据集上,算法效果如下: 在Total-text文本检测公开数据集上,算法效果如下:
|模型|骨干网络|precision|recall|Hmean|下载链接| |模型|骨干网络|precision|recall|Hmean|下载链接|
| --- | --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- | --- |
|SAST|ResNet50_vd|89.05%|76.80%|82.47%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar)| |SAST|ResNet50_vd|89.63%|78.44%|83.66%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar)|
**说明:** SAST模型训练额外加入了icdar2013、icdar2017、COCO-Text、ArT等公开数据集进行调优。PaddleOCR用到的经过整理格式的英文公开数据集下载:[百度云地址](https://pan.baidu.com/s/12cPnZcVuV1zn5DOd4mqjVw) (提取码: 2bpi) **说明:** SAST模型训练额外加入了icdar2013、icdar2017、COCO-Text、ArT等公开数据集进行调优。PaddleOCR用到的经过整理格式的英文公开数据集下载:[百度云地址](https://pan.baidu.com/s/12cPnZcVuV1zn5DOd4mqjVw) (提取码: 2bpi)
......
...@@ -23,13 +23,13 @@ On the ICDAR2015 dataset, the text detection result is as follows: ...@@ -23,13 +23,13 @@ On the ICDAR2015 dataset, the text detection result is as follows:
|EAST|MobileNetV3|78.24%|79.15%|78.69%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)| |EAST|MobileNetV3|78.24%|79.15%|78.69%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)|
|DB|ResNet50_vd|86.41%|78.72%|82.38%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| |DB|ResNet50_vd|86.41%|78.72%|82.38%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)|
|DB|MobileNetV3|77.29%|73.08%|75.12%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| |DB|MobileNetV3|77.29%|73.08%|75.12%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)|
|SAST|ResNet50_vd|91.83%|81.80%|86.52%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)| |SAST|ResNet50_vd|91.39%|83.77%|87.42%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)|
On Total-Text dataset, the text detection result is as follows: On Total-Text dataset, the text detection result is as follows:
|Model|Backbone|precision|recall|Hmean|Download link| |Model|Backbone|precision|recall|Hmean|Download link|
| --- | --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- | --- |
|SAST|ResNet50_vd|89.05%|76.80%|82.47%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar)| |SAST|ResNet50_vd|89.63%|78.44%|83.66%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar)|
**Note:** Additional data, like icdar2013, icdar2017, COCO-Text, ArT, was added to the model training of SAST. Download English public dataset in organized format used by PaddleOCR from [Baidu Drive](https://pan.baidu.com/s/12cPnZcVuV1zn5DOd4mqjVw) (download code: 2bpi). **Note:** Additional data, like icdar2013, icdar2017, COCO-Text, ArT, was added to the model training of SAST. Download English public dataset in organized format used by PaddleOCR from [Baidu Drive](https://pan.baidu.com/s/12cPnZcVuV1zn5DOd4mqjVw) (download code: 2bpi).
......
...@@ -66,8 +66,10 @@ def build_dataloader(config, mode, device, logger): ...@@ -66,8 +66,10 @@ def build_dataloader(config, mode, device, logger):
batch_size = loader_config['batch_size_per_card'] batch_size = loader_config['batch_size_per_card']
drop_last = loader_config['drop_last'] drop_last = loader_config['drop_last']
num_workers = loader_config['num_workers'] num_workers = loader_config['num_workers']
if 'use_shared_memory' in loader_config.keys():
use_shared_memory = False use_shared_memory = loader_config['use_shared_memory']
else:
use_shared_memory = True
if mode == "Train": if mode == "Train":
#Distribute data to multiple cards #Distribute data to multiple cards
batch_sampler = DistributedBatchSampler( batch_sampler = DistributedBatchSampler(
...@@ -75,7 +77,6 @@ def build_dataloader(config, mode, device, logger): ...@@ -75,7 +77,6 @@ def build_dataloader(config, mode, device, logger):
batch_size=batch_size, batch_size=batch_size,
shuffle=False, shuffle=False,
drop_last=drop_last) drop_last=drop_last)
use_shared_memory = True
else: else:
#Distribute data to single card #Distribute data to single card
batch_sampler = BatchSampler( batch_sampler = BatchSampler(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册