“8a2cf0fbffbe1becd626d6ec3f31740540807b20”上不存在“paddle/fluid/framework/dim_test.cu”
提交 3a2c722d 编写于 作者: H Hui Zhang

fix dev & test dataset filter

上级 b69021f9
......@@ -144,6 +144,12 @@ class DeepSpeech2Trainer(Trainer):
config.data.manifest = config.data.dev_manifest
config.data.augmentation_config = ""
config.data.min_input_len = 0.0 # second
config.data.max_input_len: 100.0 # second
config.data.min_output_len: 0.0 # tokens
config.data.max_output_len: 400.0 # tokens
config.data.min_output_input_ratio: 0.00
config.data.max_output_input_ratio: 100.0
dev_dataset = ManifestDataset.from_config(config)
if self.parallel:
......@@ -320,9 +326,15 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
config.defrost()
# return raw text
config.data.manifest = config.data.test_manifest
config.data.keep_transcription_text = True
config.data.augmentation_config = ""
config.data.manifest = config.data.test_manifest
config.data.min_input_len = 0.0 # second
config.data.max_input_len: 100.0 # second
config.data.min_output_len: 0.0 # tokens
config.data.max_output_len: 400.0 # tokens
config.data.min_output_input_ratio: 0.00
config.data.max_output_input_ratio: 100.0
test_dataset = ManifestDataset.from_config(config)
# return text ord id
......
......@@ -215,8 +215,14 @@ class U2Trainer(Trainer):
config.data.manifest = config.data.train_manifest
train_dataset = ManifestDataset.from_config(config)
config.data.manifest = config.data.dev_manifest
config.data.augmentation_config = ""
config.data.min_input_len = 0.0 # second
config.data.max_input_len: 100.0 # second
config.data.min_output_len: 0.0 # tokens
config.data.max_output_len: 400.0 # tokens
config.data.min_output_input_ratio: 0.00
config.data.max_output_input_ratio: 100.0
config.data.manifest = config.data.dev_manifest
dev_dataset = ManifestDataset.from_config(config)
collate_fn = SpeechCollator(keep_transcription_text=False)
......@@ -253,6 +259,12 @@ class U2Trainer(Trainer):
# test dataset, return raw text
config.data.keep_transcription_text = True
config.data.augmentation_config = ""
config.data.min_input_len = 0.0 # second
config.data.max_input_len: 100.0 # second
config.data.min_output_len: 0.0 # tokens
config.data.max_output_len: 400.0 # tokens
config.data.min_output_input_ratio: 0.00
config.data.max_output_input_ratio: 100.0
config.data.manifest = config.data.test_manifest
test_dataset = ManifestDataset.from_config(config)
# return text ord id
......
......@@ -27,6 +27,7 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
mv data/manifest.${set} data/manifest.${set}.raw
done
rm -rf data/manifest.train.raw data/manifest.dev.raw data/manifest.test.raw
for set in train-clean-100 train-clean-360 train-other-500; do
cat data/manifest.${set}.raw >> data/manifest.train.raw
done
......
......@@ -31,6 +31,7 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
mv data/manifest.${set} data/manifest.${set}.raw
done
rm -rf data/manifest.train.raw data/manifest.dev.raw data/manifest.test.raw
for set in train-clean-100 train-clean-360 train-other-500; do
cat data/manifest.${set}.raw >> data/manifest.train.raw
done
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册