提交 ede001e8 编写于 作者: H Hui Zhang

fix dataset filter

上级 3a2c722d
......@@ -145,11 +145,11 @@ class DeepSpeech2Trainer(Trainer):
config.data.manifest = config.data.dev_manifest
config.data.augmentation_config = ""
config.data.min_input_len = 0.0 # second
config.data.max_input_len: 100.0 # second
config.data.min_output_len: 0.0 # tokens
config.data.max_output_len: 400.0 # tokens
config.data.min_output_input_ratio: 0.00
config.data.max_output_input_ratio: 100.0
config.data.max_input_len = float('inf') # second
config.data.min_output_len = 0.0 # tokens
config.data.max_output_len = float('inf') # tokens
config.data.min_output_input_ratio = 0.00
config.data.max_output_input_ratio = float('inf')
dev_dataset = ManifestDataset.from_config(config)
if self.parallel:
......@@ -330,11 +330,11 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
config.data.keep_transcription_text = True
config.data.augmentation_config = ""
config.data.min_input_len = 0.0 # second
config.data.max_input_len: 100.0 # second
config.data.min_output_len: 0.0 # tokens
config.data.max_output_len: 400.0 # tokens
config.data.min_output_input_ratio: 0.00
config.data.max_output_input_ratio: 100.0
config.data.max_input_len = float('inf') # second
config.data.min_output_len = 0.0 # tokens
config.data.max_output_len = float('inf') # tokens
config.data.min_output_input_ratio = 0.00
config.data.max_output_input_ratio = float('inf')
test_dataset = ManifestDataset.from_config(config)
# return text ord id
......
......@@ -215,14 +215,14 @@ class U2Trainer(Trainer):
config.data.manifest = config.data.train_manifest
train_dataset = ManifestDataset.from_config(config)
config.data.manifest = config.data.dev_manifest
config.data.augmentation_config = ""
config.data.min_input_len = 0.0 # second
config.data.max_input_len: 100.0 # second
config.data.min_output_len: 0.0 # tokens
config.data.max_output_len: 400.0 # tokens
config.data.min_output_input_ratio: 0.00
config.data.max_output_input_ratio: 100.0
config.data.manifest = config.data.dev_manifest
config.data.max_input_len = float('inf') # second
config.data.min_output_len = 0.0 # tokens
config.data.max_output_len = float('inf') # tokens
config.data.min_output_input_ratio = 0.00
config.data.max_output_input_ratio = float('inf')
dev_dataset = ManifestDataset.from_config(config)
collate_fn = SpeechCollator(keep_transcription_text=False)
......@@ -257,15 +257,15 @@ class U2Trainer(Trainer):
collate_fn=collate_fn)
# test dataset, return raw text
config.data.manifest = config.data.test_manifest
config.data.keep_transcription_text = True
config.data.augmentation_config = ""
config.data.min_input_len = 0.0 # second
config.data.max_input_len: 100.0 # second
config.data.min_output_len: 0.0 # tokens
config.data.max_output_len: 400.0 # tokens
config.data.min_output_input_ratio: 0.00
config.data.max_output_input_ratio: 100.0
config.data.manifest = config.data.test_manifest
config.data.max_input_len = float('inf') # second
config.data.min_output_len = 0.0 # tokens
config.data.max_output_len = float('inf') # tokens
config.data.min_output_input_ratio = 0.00
config.data.max_output_input_ratio = float('inf')
test_dataset = ManifestDataset.from_config(config)
# return text ord id
self.test_loader = DataLoader(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册