diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md index bb7d01712a85c92a02109e41814059e6c98c7cdc..4019aa123f6bf1ab0e890b68fbbcd008eb86e440 100644 --- a/doc/doc_ch/recognition.md +++ b/doc/doc_ch/recognition.md @@ -235,7 +235,10 @@ PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/rec_icdar15_t | rec_r31_sar.yml | SAR | ResNet31 | None | LSTM encoder | LSTM decoder | | rec_resnet_stn_bilstm_att.yml | SEED | Aster_Resnet | STN | BiLSTM | att | -*其中SEED模型需要额外加载FastText训练好的[语言模型](https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz) +*其中SEED模型需要额外加载FastText训练好的[语言模型](https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz) ,并且安装 fasttext 依赖: +``` +python3.7 -m pip install fasttext==0.9.1 +``` 训练中文数据,推荐使用[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml),如您希望尝试其他算法在中文数据集上的效果,请参考下列说明修改配置文件: diff --git a/ppocr/data/imaug/operators.py b/ppocr/data/imaug/operators.py index c3dfd316f86d88b5c7fd52eb6ae23d22a4dd32eb..daa67a25dae93dde74fc0b92aad4aa6ef4d4c003 100644 --- a/ppocr/data/imaug/operators.py +++ b/ppocr/data/imaug/operators.py @@ -23,7 +23,6 @@ import sys import six import cv2 import numpy as np -import fasttext class DecodeImage(object): @@ -136,6 +135,7 @@ class ToCHWImage(object): class Fasttext(object): def __init__(self, path="None", **kwargs): + import fasttext self.fast_model = fasttext.load_model(path) def __call__(self, data): diff --git a/requirements.txt b/requirements.txt index 9900588b25df99e0853ec4521f0632578c55f530..5e999790397c1530f251993e9f5b5d0f2d67b753 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,5 +12,4 @@ cython lxml premailer openpyxl -fasttext==0.9.1