提交 a3ed9b00 编写于 作者: G guosheng

Refine docs of reader in Transformer by following comments

上级 f0fc20ee
...@@ -64,8 +64,7 @@ class Pool(object): ...@@ -64,8 +64,7 @@ class Pool(object):
class DataReader(object): class DataReader(object):
""" """
The data reader loads all data from files and produces batches of data The data reader loads all data from files and produces batches of data
in the way corresponding to settings. See the doc of __init__ function in the way corresponding to settings.
for more setting details.
An example of returning a generator producing data batches whose data An example of returning a generator producing data batches whose data
is shuffled in each pass and sorted in each pool: is shuffled in each pass and sorted in each pool:
...@@ -86,29 +85,6 @@ class DataReader(object): ...@@ -86,29 +85,6 @@ class DataReader(object):
unk_mark='<unk>', unk_mark='<unk>',
clip_last_batch=False).batch_generator clip_last_batch=False).batch_generator
``` ```
"""
def __init__(self,
src_vocab_fpath,
trg_vocab_fpath,
fpattern,
batch_size,
pool_size,
sort_type=SortType.NONE,
clip_last_batch=True,
tar_fname=None,
min_length=0,
max_length=100,
shuffle=True,
shuffle_batch=False,
use_token_batch=False,
delimiter="\t",
start_mark="<s>",
end_mark="<e>",
unk_mark="<unk>",
seed=0):
"""
Load all data from files and set the settings to make mini-batches.
:param src_vocab_fpath: The path of vocabulary file of source language. :param src_vocab_fpath: The path of vocabulary file of source language.
:type src_vocab_fpath: basestring :type src_vocab_fpath: basestring
...@@ -154,6 +130,26 @@ class DataReader(object): ...@@ -154,6 +130,26 @@ class DataReader(object):
:param seed: The seed for random. :param seed: The seed for random.
:type seed: int :type seed: int
""" """
def __init__(self,
src_vocab_fpath,
trg_vocab_fpath,
fpattern,
batch_size,
pool_size,
sort_type=SortType.NONE,
clip_last_batch=True,
tar_fname=None,
min_length=0,
max_length=100,
shuffle=True,
shuffle_batch=False,
use_token_batch=False,
delimiter="\t",
start_mark="<s>",
end_mark="<e>",
unk_mark="<unk>",
seed=0):
self._src_vocab = self.load_dict(src_vocab_fpath) self._src_vocab = self.load_dict(src_vocab_fpath)
self._only_src = True self._only_src = True
if trg_vocab_fpath is not None: if trg_vocab_fpath is not None:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册