diff --git a/modules/text/machine_translation/transformer/en-de/requirements.txt b/modules/text/machine_translation/transformer/en-de/requirements.txt index 270d8d38e7f8c0167f3033159c9f13a671bcb4a3..adf3e7fe61baa839a71c8b276b752c3ad2148ca4 100644 --- a/modules/text/machine_translation/transformer/en-de/requirements.txt +++ b/modules/text/machine_translation/transformer/en-de/requirements.txt @@ -1,2 +1,2 @@ -nltk==3.4.5 +sacremoses subword-nmt diff --git a/modules/text/machine_translation/transformer/en-de/utils.py b/modules/text/machine_translation/transformer/en-de/utils.py index cfd8e7f034786753c70e8704ac27901cf377d912..761f656b00d4533d469d29eecb2eef552305a2d8 100644 --- a/modules/text/machine_translation/transformer/en-de/utils.py +++ b/modules/text/machine_translation/transformer/en-de/utils.py @@ -16,14 +16,7 @@ import re from typing import List import codecs -try: - import nltk - nltk.data.find('misc/perluniprops') - nltk.data.find('corpora/nonbreaking_prefixes') -except LookupError: - nltk.download('perluniprops') - nltk.download('nonbreaking_prefixes') -from nltk.tokenize.moses import MosesTokenizer, MosesDetokenizer +from sacremoses import MosesTokenizer, MosesDetokenizer from subword_nmt.apply_bpe import BPE diff --git a/modules/text/machine_translation/transformer/zh-en/requirements.txt b/modules/text/machine_translation/transformer/zh-en/requirements.txt index e0442c89f12f991b8d9c3a7fa5aa1e98915abf34..6029eb21ad870229e0cb41e4462cf741227e52e2 100644 --- a/modules/text/machine_translation/transformer/zh-en/requirements.txt +++ b/modules/text/machine_translation/transformer/zh-en/requirements.txt @@ -1,3 +1,3 @@ -nltk==3.4.5 jieba +sacremoses subword-nmt diff --git a/modules/text/machine_translation/transformer/zh-en/utils.py b/modules/text/machine_translation/transformer/zh-en/utils.py index 6bb054068b0d3dbe1f7383f7d2d0ea577ea3679c..8a556ac668748d3e8ad031c994c295374ee27340 100644 --- a/modules/text/machine_translation/transformer/zh-en/utils.py +++ b/modules/text/machine_translation/transformer/zh-en/utils.py @@ -21,14 +21,7 @@ import jieba jieba.setLogLevel(logging.INFO) -try: - import nltk - nltk.data.find('misc/perluniprops') - nltk.data.find('corpora/nonbreaking_prefixes') -except LookupError: - nltk.download('perluniprops') - nltk.download('nonbreaking_prefixes') -from nltk.tokenize.moses import MosesDetokenizer +from sacremoses import MosesTokenizer, MosesDetokenizer from subword_nmt.apply_bpe import BPE