From dce498cebf89d390586170cee62c650785723f07 Mon Sep 17 00:00:00 2001 From: liu zhengxi <380185688@qq.com> Date: Wed, 16 Dec 2020 12:40:06 +0800 Subject: [PATCH] alter the default value for vocab (#5062) --- PaddleNLP/paddlenlp/data/vocab.py | 48 +++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/PaddleNLP/paddlenlp/data/vocab.py b/PaddleNLP/paddlenlp/data/vocab.py index 8d947129..d83083b3 100644 --- a/PaddleNLP/paddlenlp/data/vocab.py +++ b/PaddleNLP/paddlenlp/data/vocab.py @@ -36,14 +36,14 @@ class Vocab(object): between tokens and indices to be used. If provided, adjust the tokens and indices mapping according to it. If None, counter must be provided. Default: None. - unk_token (str): special token for unknow token. If no need, it also - could be None. Default: ''. - pad_token (str): special token for padding token. If no need, it also - could be None. Default: ''. - bos_token (str): special token for bos token. If no need, it also - could be None. Default: '. - eos_token (str): special token for eos token. If no need, it also - could be None. Default: ''. + unk_token (str): special token for unknow token ''. If no need, it also + could be None. Default: None. + pad_token (str): special token for padding token ''. If no need, it also + could be None. Default: None. + bos_token (str): special token for bos token ''. If no need, it also + could be None. Default: None. + eos_token (str): special token for eos token ''. If no need, it also + could be None. Default: None. **kwargs (dict): Keyword arguments ending with `_token`. It can be used to specify further special tokens that will be exposed as attribute of the vocabulary and associated with an index. @@ -54,10 +54,10 @@ class Vocab(object): max_size=None, min_freq=1, token_to_idx=None, - unk_token='', - pad_token='', - bos_token='', - eos_token='', + unk_token=None, + pad_token=None, + bos_token=None, + eos_token=None, **kwargs): # Handle special tokens combs = (('unk_token', unk_token), ('pad_token', pad_token), @@ -317,10 +317,10 @@ class Vocab(object): max_size=None, min_freq=1, token_to_idx=None, - unk_token='', - pad_token='', - bos_token='', - eos_token='', + unk_token=None, + pad_token=None, + bos_token=None, + eos_token=None, **kwargs): """ Building vocab accoring to given iterator and other information. Iterate @@ -333,14 +333,14 @@ class Vocab(object): between tokens and indices to be used. If provided, adjust the tokens and indices mapping according to it. If None, counter must be provided. Default: None. - unk_token (str): special token for unknow token. If no need, it also - could be None. Default: ''. - pad_token (str): special token for padding token. If no need, it also - could be None. Default: ''. - bos_token (str): special token for bos token. If no need, it also - could be None. Default: '. - eos_token (str): special token for eos token. If no need, it also - could be None. Default: ''. + unk_token (str): special token for unknow token ''. If no need, it also + could be None. Default: None. + pad_token (str): special token for padding token ''. If no need, it also + could be None. Default: None. + bos_token (str): special token for bos token ''. If no need, it also + could be None. Default: None. + eos_token (str): special token for eos token ''. If no need, it also + could be None. Default: None. **kwargs (dict): Keyword arguments ending with `_token`. It can be used to specify further special tokens that will be exposed as attribute of the vocabulary and associated with an index. -- GitLab