From fda276b12cee655d378977f26ae32ea167bb8553 Mon Sep 17 00:00:00 2001 From: KP <109694228@qq.com> Date: Tue, 8 Jun 2021 20:42:47 +0800 Subject: [PATCH] Update dependencies of machine translation modules --- .../transformer/en-de/requirements.txt | 2 +- .../text/machine_translation/transformer/en-de/utils.py | 9 +-------- .../transformer/zh-en/requirements.txt | 2 +- .../text/machine_translation/transformer/zh-en/utils.py | 9 +-------- 4 files changed, 4 insertions(+), 18 deletions(-) diff --git a/modules/text/machine_translation/transformer/en-de/requirements.txt b/modules/text/machine_translation/transformer/en-de/requirements.txt index 270d8d38..adf3e7fe 100644 --- a/modules/text/machine_translation/transformer/en-de/requirements.txt +++ b/modules/text/machine_translation/transformer/en-de/requirements.txt @@ -1,2 +1,2 @@ -nltk==3.4.5 +sacremoses subword-nmt diff --git a/modules/text/machine_translation/transformer/en-de/utils.py b/modules/text/machine_translation/transformer/en-de/utils.py index cfd8e7f0..761f656b 100644 --- a/modules/text/machine_translation/transformer/en-de/utils.py +++ b/modules/text/machine_translation/transformer/en-de/utils.py @@ -16,14 +16,7 @@ import re from typing import List import codecs -try: - import nltk - nltk.data.find('misc/perluniprops') - nltk.data.find('corpora/nonbreaking_prefixes') -except LookupError: - nltk.download('perluniprops') - nltk.download('nonbreaking_prefixes') -from nltk.tokenize.moses import MosesTokenizer, MosesDetokenizer +from sacremoses import MosesTokenizer, MosesDetokenizer from subword_nmt.apply_bpe import BPE diff --git a/modules/text/machine_translation/transformer/zh-en/requirements.txt b/modules/text/machine_translation/transformer/zh-en/requirements.txt index e0442c89..6029eb21 100644 --- a/modules/text/machine_translation/transformer/zh-en/requirements.txt +++ b/modules/text/machine_translation/transformer/zh-en/requirements.txt @@ -1,3 +1,3 @@ -nltk==3.4.5 jieba +sacremoses subword-nmt diff --git a/modules/text/machine_translation/transformer/zh-en/utils.py b/modules/text/machine_translation/transformer/zh-en/utils.py index 6bb05406..8a556ac6 100644 --- a/modules/text/machine_translation/transformer/zh-en/utils.py +++ b/modules/text/machine_translation/transformer/zh-en/utils.py @@ -21,14 +21,7 @@ import jieba jieba.setLogLevel(logging.INFO) -try: - import nltk - nltk.data.find('misc/perluniprops') - nltk.data.find('corpora/nonbreaking_prefixes') -except LookupError: - nltk.download('perluniprops') - nltk.download('nonbreaking_prefixes') -from nltk.tokenize.moses import MosesDetokenizer +from sacremoses import MosesTokenizer, MosesDetokenizer from subword_nmt.apply_bpe import BPE -- GitLab