提交 a3ed9b00 编写于 作者: G guosheng

Refine docs of reader in Transformer by following comments

上级 f0fc20ee
......@@ -64,8 +64,7 @@ class Pool(object):
class DataReader(object):
"""
The data reader loads all data from files and produces batches of data
in the way corresponding to settings. See the doc of __init__ function
for more setting details.
in the way corresponding to settings.
An example of returning a generator producing data batches whose data
is shuffled in each pass and sorted in each pool:
......@@ -86,29 +85,6 @@ class DataReader(object):
unk_mark='<unk>',
clip_last_batch=False).batch_generator
```
"""
def __init__(self,
src_vocab_fpath,
trg_vocab_fpath,
fpattern,
batch_size,
pool_size,
sort_type=SortType.NONE,
clip_last_batch=True,
tar_fname=None,
min_length=0,
max_length=100,
shuffle=True,
shuffle_batch=False,
use_token_batch=False,
delimiter="\t",
start_mark="<s>",
end_mark="<e>",
unk_mark="<unk>",
seed=0):
"""
Load all data from files and set the settings to make mini-batches.
:param src_vocab_fpath: The path of vocabulary file of source language.
:type src_vocab_fpath: basestring
......@@ -154,6 +130,26 @@ class DataReader(object):
:param seed: The seed for random.
:type seed: int
"""
def __init__(self,
src_vocab_fpath,
trg_vocab_fpath,
fpattern,
batch_size,
pool_size,
sort_type=SortType.NONE,
clip_last_batch=True,
tar_fname=None,
min_length=0,
max_length=100,
shuffle=True,
shuffle_batch=False,
use_token_batch=False,
delimiter="\t",
start_mark="<s>",
end_mark="<e>",
unk_mark="<unk>",
seed=0):
self._src_vocab = self.load_dict(src_vocab_fpath)
self._only_src = True
if trg_vocab_fpath is not None:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册