diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 8837444d68d344f5eed0efd93ec64d392e2c4dad..334d6bc8e94a47d3fe4ba644f24965df3ea45579 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -368,7 +368,7 @@ class U2Tester(U2Trainer): trans.append(''.join([chr(i) for i in ids])) return trans - def compute_metrics(self, utts, audio, audio_len, texts, texts_len, fout=None, fref=None): + def compute_metrics(self, utts, audio, audio_len, texts, texts_len, fout=None): cfg = self.config.decoding errors_sum, len_refs, num_ins = 0.0, 0, 0 errors_func = error_rate.char_errors if cfg.error_rate_type == 'cer' else error_rate.word_errors @@ -402,8 +402,6 @@ class U2Tester(U2Trainer): num_ins += 1 if fout: fout.write(utt + " " + result + "\n") - if fref: - fref.write(utt + " " + target + "\n") logger.info("\nTarget Transcription: %s\nOutput Transcription: %s" % (target, result)) logger.info("One example error rate [%s] = %f" % @@ -432,7 +430,6 @@ class U2Tester(U2Trainer): num_time = 0.0 with open(self.args.result_file, 'w') as fout: for i, batch in enumerate(self.test_loader): - # utt, audio, audio_len, text, text_len = batch metrics = self.compute_metrics(*batch, fout=fout) num_frames += metrics['num_frames'] num_time += metrics["decode_time"] diff --git a/deepspeech/io/dataset.py b/deepspeech/io/dataset.py index eaa57a4ec4b120272731bf8aa4ab6141a47fa522..1cf3827d344ad57bfa18d0b3ce227cc5b2f6e6f8 100644 --- a/deepspeech/io/dataset.py +++ b/deepspeech/io/dataset.py @@ -284,7 +284,7 @@ class ManifestDataset(Dataset): return self._local_data.tar2object[tarpath].extractfile( self._local_data.tar2info[tarpath][filename]) - def process_utterance(self, utt, audio_file, transcript): + def process_utterance(self, audio_file, transcript): """Load, augment, featurize and normalize for speech data. :param audio_file: Filepath or file object of audio file. @@ -323,7 +323,7 @@ class ManifestDataset(Dataset): specgram = self._augmentation_pipeline.transform_feature(specgram) feature_aug_time = time.time() - start_time #logger.debug(f"audio feature augmentation time: {feature_aug_time}") - return utt, specgram, transcript_part + return specgram, transcript_part def _instance_reader_creator(self, manifest): """ @@ -336,9 +336,7 @@ class ManifestDataset(Dataset): def reader(): for instance in manifest: - # inst = self.process_utterance(instance["feat"], - # instance["text"]) - inst = self.process_utterance(instance["utt"], instance["feat"], + inst = self.process_utterance(instance["feat"], instance["text"]) yield inst @@ -349,6 +347,6 @@ class ManifestDataset(Dataset): def __getitem__(self, idx): instance = self._manifest[idx] - return self.process_utterance(instance["utt"], instance["feat"], + feat, text =self.process_utterance(instance["feat"], instance["text"]) - # return self.process_utterance(instance["feat"], instance["text"]) + return instance["utt"], feat, text diff --git a/examples/tiny/s1/conf/transformer.yaml b/examples/tiny/s1/conf/transformer.yaml index dd3e026772aaf5b742b079d191b43eda008a7667..0a7cf3be845b68a87799904f7fdf167813fb1794 100644 --- a/examples/tiny/s1/conf/transformer.yaml +++ b/examples/tiny/s1/conf/transformer.yaml @@ -8,7 +8,7 @@ data: spm_model_prefix: 'data/bpe_unigram_200' mean_std_filepath: "" augmentation_config: conf/augmentation.json - batch_size: 2 #4 + batch_size: 4 min_input_len: 0.5 # second max_input_len: 20.0 # second min_output_len: 0.0 # tokens @@ -31,7 +31,7 @@ data: keep_transcription_text: False sortagrad: True shuffle_method: batch_shuffle - num_workers: 0 #2 + num_workers: 2 # network architecture diff --git a/examples/tiny/s1/run.sh b/examples/tiny/s1/run.sh index fdcf7ff017e63070ea0f7d8728c8677644fd6eff..b148869b7d6aaecc7f9181818be315846ee11012 100755 --- a/examples/tiny/s1/run.sh +++ b/examples/tiny/s1/run.sh @@ -30,12 +30,10 @@ fi if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then # test ckpt avg_n - # CUDA_VISIBLE_DEVICES=7 - ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1 + CUDA_VISIBLE_DEVICES=7 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1 fi if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then # export ckpt avg_n - # CUDA_VISIBLE_DEVICES= - ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit + CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit fi