From facaca0519291d9b6e6ab06c0eec4f7582303671 Mon Sep 17 00:00:00 2001 From: tink2123 Date: Fri, 31 Dec 2021 13:14:49 +0800 Subject: [PATCH] cherrr-pick, mv fasttext for requirments --- doc/doc_ch/recognition.md | 5 ++++- ppocr/data/imaug/operators.py | 2 +- requirements.txt | 1 - 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md index bb7d0171..4019aa12 100644 --- a/doc/doc_ch/recognition.md +++ b/doc/doc_ch/recognition.md @@ -235,7 +235,10 @@ PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/rec_icdar15_t | rec_r31_sar.yml | SAR | ResNet31 | None | LSTM encoder | LSTM decoder | | rec_resnet_stn_bilstm_att.yml | SEED | Aster_Resnet | STN | BiLSTM | att | -*其中SEED模型需要额外加载FastText训练好的[语言模型](https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz) +*其中SEED模型需要额外加载FastText训练好的[语言模型](https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz) ,并且安装 fasttext 依赖: +``` +python3.7 -m pip install fasttext==0.9.1 +``` 训练中文数据,推荐使用[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml),如您希望尝试其他算法在中文数据集上的效果,请参考下列说明修改配置文件: diff --git a/ppocr/data/imaug/operators.py b/ppocr/data/imaug/operators.py index c3dfd316..daa67a25 100644 --- a/ppocr/data/imaug/operators.py +++ b/ppocr/data/imaug/operators.py @@ -23,7 +23,6 @@ import sys import six import cv2 import numpy as np -import fasttext class DecodeImage(object): @@ -136,6 +135,7 @@ class ToCHWImage(object): class Fasttext(object): def __init__(self, path="None", **kwargs): + import fasttext self.fast_model = fasttext.load_model(path) def __call__(self, data): diff --git a/requirements.txt b/requirements.txt index 9900588b..5e999790 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,5 +12,4 @@ cython lxml premailer openpyxl -fasttext==0.9.1 -- GitLab