提交 56480e10 编写于 作者: H Hui Zhang

fix format

上级 7ec0ed4a
......@@ -28,4 +28,4 @@ Need set `decoding.decoding_chunk_size=16` when decoding.
| transformer | 31.95M | conf/transformer.yaml | spec_aug | test | attention | 3.858648955821991 | 0.057293 |
| transformer | 31.95M | conf/transformer.yaml | spec_aug | test | ctc_greedy_search | 3.858648955821991 | 0.061837 |
| transformer | 31.95M | conf/transformer.yaml | spec_aug | test | ctc_prefix_beam_search | 3.858648955821991 | 0.061685 |
| transformer | 31.95M | conf/transformer.yaml | spec_aug | test | attention_rescoring | 3.858648955821991 | 0.053844 |
\ No newline at end of file
| transformer | 31.95M | conf/transformer.yaml | spec_aug | test | attention_rescoring | 3.858648955821991 | 0.053844 |
......@@ -82,7 +82,7 @@ def create_manifest(data_dir, manifest_path_prefix):
# if no transcription for audio then skipped
if audio_id not in transcript_dict:
continue
utt2spk = Path(audio_path).parent.name
audio_data, samplerate = soundfile.read(audio_path)
duration = float(len(audio_data) / samplerate)
......
......@@ -73,7 +73,6 @@ def create_manifest(data_dir, manifest_path_prefix):
audio_data, samplerate = soundfile.read(audio_path)
duration = float(len(audio_data) / samplerate)
translation_str = " ".join(translation.split())
trancription_str = " ".join(trancription.split())
json_lines.append(
......@@ -82,7 +81,7 @@ def create_manifest(data_dir, manifest_path_prefix):
'utt': utt,
'feat': audio_path,
'feat_shape': (duration, ), # second
'text': [translation_str, trancription_str],
'text': [translation_str, trancription_str],
},
ensure_ascii=False))
......
......@@ -124,7 +124,7 @@ def create_manifest(data_dir, manifest_path_prefix):
json.dumps(
{
'utt': audio_id,
'utt2spk', spk,
'utt2spk': spk,
'feat': audio_path,
'feat_shape': (duration, ), # second
'text': word_text, # charactor
......
......@@ -22,9 +22,9 @@ import argparse
import codecs
import json
import os
from pathlib import Path
import soundfile
from pathlib import Path
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
......
......@@ -24,4 +24,4 @@
| transformer | 32.52 M | conf/transformer.yaml | spec_aug | test-clean | attention | 6.805267604192098, | 0.049795 |
| transformer | 32.52 M | conf/transformer.yaml | spec_aug | test-clean | ctc_greedy_search | 6.805267604192098, | 0.054892 |
| transformer | 32.52 M | conf/transformer.yaml | spec_aug | test-clean | ctc_prefix_beam_search | 6.805267604192098, | 0.054531 |
| transformer | 32.52 M | conf/transformer.yaml | spec_aug | test-clean | attention_rescoring | 6.805267604192098, | 0.042244 |
\ No newline at end of file
| transformer | 32.52 M | conf/transformer.yaml | spec_aug | test-clean | attention_rescoring | 6.805267604192098, | 0.042244 |
......@@ -4,4 +4,4 @@ asr model with phone unit
* asr0 - deepspeech2 Streaming/Non-Streaming
* asr1 - transformer/conformer Streaming/Non-Streaming
* asr2 - transformer/conformer Streaming/Non-Streaming with Kaldi feature
\ No newline at end of file
* asr2 - transformer/conformer Streaming/Non-Streaming with Kaldi feature
......@@ -55,4 +55,4 @@ As shown in the following table, we provide 3 training subsets, namely `S`, `M`
|-----------------|-------|--------------|-----------------------------------------------------------------------------------------|
| DEV | 20 | Internet | Specially designed for some speech tools which require cross-validation set in training |
| TEST\_NET | 23 | Internet | Match test |
| TEST\_MEETING | 15 | Real meeting | Mismatch test which is a far-field, conversational, spontaneous, and meeting dataset |
\ No newline at end of file
| TEST\_MEETING | 15 | Real meeting | Mismatch test which is a far-field, conversational, spontaneous, and meeting dataset |
......@@ -21,4 +21,4 @@ Pretrain model from http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/wen
| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | attention | - | 0.048456 |
| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | ctc_greedy_search | - | 0.052534 |
| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | ctc_prefix_beam_search | - | 0.052915 |
| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | attention_rescoring | - | 0.047904 |
\ No newline at end of file
| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | attention_rescoring | - | 0.047904 |
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 Xiaomi Corporation (Author: Yongqing Wang)
# Mobvoi Inc(Author: Di Wu, Binbin Zhang)
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
......@@ -12,11 +24,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import argparse
import json
import os
import sys
def get_args():
......@@ -85,13 +96,13 @@ def meta_analysis(input_json, output_dir):
else:
utt2text.write(f'{sid}\t{text}\n')
segments.write(
f'{sid}\t{aid}\t{start_time}\t{end_time}\n'
)
f'{sid}\t{aid}\t{start_time}\t{end_time}\n')
utt2dur.write(f'{sid}\t{dur}\n')
segment_sub_names = " ".join(segment_subsets)
utt2subsets.write(
f'{sid}\t{segment_sub_names}\n')
def main():
args = get_args()
......@@ -99,4 +110,4 @@ def main():
if __name__ == '__main__':
main()
\ No newline at end of file
main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 NPU, ASLP Group (Author: Qijie Shao)
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
......@@ -11,14 +23,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# process_opus.py: segmentation and downsampling of opus audio
# usage: python3 process_opus.py wav.scp segments output_wav.scp
import os
import sys
from pydub import AudioSegment
import sys
import os
def read_file(wav_scp, segments):
......@@ -86,4 +96,4 @@ def main():
if __name__ == '__main__':
main()
\ No newline at end of file
main()
......@@ -24,15 +24,10 @@ import jsonlines
import numpy as np
import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from yacs.config import CfgNode
from paddlespeech.s2t.frontend.featurizer import TextFeaturizer
from paddlespeech.s2t.io.collator import SpeechCollator
from paddlespeech.s2t.io.dataloader import BatchDataLoader
from paddlespeech.s2t.io.dataset import ManifestDataset
from paddlespeech.s2t.io.sampler import SortagradBatchSampler
from paddlespeech.s2t.io.sampler import SortagradDistributedBatchSampler
from paddlespeech.s2t.models.u2 import U2Model
from paddlespeech.s2t.training.optimizer import OptimizerFactory
from paddlespeech.s2t.training.reporter import ObsScope
......@@ -215,7 +210,7 @@ class U2Trainer(Trainer):
msg += f"{v:>.8f}" if isinstance(v,
float) else f"{v}"
msg += f" {k.split(',')[1]}" if len(
k.split(',')) == 2 else f""
k.split(',')) == 2 else ""
msg += ","
msg = msg[:-1] # remove the last ","
if (batch_index + 1
......
......@@ -57,7 +57,7 @@ class TextFeaturizer():
vocab_filepath, maskctc)
self.vocab_size = len(self.vocab_list)
else:
logger.warning(f"TextFeaturizer: not have vocab file.")
logger.warning("TextFeaturizer: not have vocab file.")
if unit_type == 'spm':
spm_model = spm_model_prefix + '.model'
......
......@@ -341,7 +341,7 @@ class LogMelSpectrogramKaldi():
self.eps = eps
self.remove_dc_offset = True
self.preemph = 0.97
self.dither = dither # only work in train mode
self.dither = dither # only work in train mode
def __repr__(self):
return (
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册