From caa8eb4d0df409d1be316aa92cfe8533dd1800e7 Mon Sep 17 00:00:00 2001 From: KP <109694228@qq.com> Date: Sun, 24 Apr 2022 23:50:44 +0800 Subject: [PATCH] Add KWS example. --- examples/hey_snips/kws0/local/train.sh | 1 + examples/hey_snips/kws0/run.sh | 5 +++++ paddlespeech/kws/exps/mdtc/compute_det.py | 21 ++++++++++++--------- paddlespeech/kws/models/loss.py | 19 ++++++++++--------- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/examples/hey_snips/kws0/local/train.sh b/examples/hey_snips/kws0/local/train.sh index cab547b8..8d0181b8 100755 --- a/examples/hey_snips/kws0/local/train.sh +++ b/examples/hey_snips/kws0/local/train.sh @@ -7,6 +7,7 @@ if [ ${ngpu} -gt 0 ]; then python3 -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES ${BIN_DIR}/train.py \ --cfg_path ${cfg_path} else + echo "set CUDA_VISIBLE_DEVICES to enable multi-gpus trainning." python3 ${BIN_DIR}/train.py \ --cfg_path ${cfg_path} fi diff --git a/examples/hey_snips/kws0/run.sh b/examples/hey_snips/kws0/run.sh index d6d1d878..2cc09a4f 100755 --- a/examples/hey_snips/kws0/run.sh +++ b/examples/hey_snips/kws0/run.sh @@ -18,6 +18,11 @@ source path.sh ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') +if [ $# != 1 ];then + echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path" + exit -1 +fi + stage=1 stop_stage=3 diff --git a/paddlespeech/kws/exps/mdtc/compute_det.py b/paddlespeech/kws/exps/mdtc/compute_det.py index 91b02ff6..817846b8 100644 --- a/paddlespeech/kws/exps/mdtc/compute_det.py +++ b/paddlespeech/kws/exps/mdtc/compute_det.py @@ -15,6 +15,7 @@ import argparse import os +import paddle import yaml from tqdm import tqdm @@ -23,32 +24,34 @@ from paddlespeech.s2t.utils.dynamic_import import dynamic_import # yapf: disable parser = argparse.ArgumentParser(__doc__) parser.add_argument("--cfg_path", type=str, required=True) -parser.add_argument('--keyword', type=int, default=0, help='keyword label') -parser.add_argument('--step', type=float, default=0.01, help='threshold step') +parser.add_argument('--keyword_index', type=int, default=0, help='keyword index') +parser.add_argument('--step', type=float, default=0.01, help='threshold step of trigger score') parser.add_argument('--window_shift', type=int, default=50, help='window_shift is used to skip the frames after triggered') args = parser.parse_args() # yapf: enable -def load_label_and_score(keyword, ds, score_file): - score_table = {} +def load_label_and_score(keyword_index: int, + ds: paddle.io.Dataset, + score_file: os.PathLike): + score_table = {} # {utt_id: scores_over_frames} with open(score_file, 'r', encoding='utf8') as fin: for line in fin: arr = line.strip().split() key = arr[0] current_keyword = arr[1] str_list = arr[2:] - if int(current_keyword) == keyword: + if int(current_keyword) == keyword_index: scores = list(map(float, str_list)) if key not in score_table: score_table.update({key: scores}) - keyword_table = {} - filler_table = {} + keyword_table = {} # scores of keyword utt_id + filler_table = {} # scores of non-keyword utt_id filler_duration = 0.0 for key, index, duration in zip(ds.keys, ds.labels, ds.durations): assert key in score_table - if index == keyword: + if index == keyword_index: keyword_table[key] = score_table[key] else: filler_table[key] = score_table[key] @@ -78,7 +81,7 @@ if __name__ == '__main__': print('Filler total duration Hours: {}'.format(filler_duration / 3600.0)) pbar = tqdm(total=int(1.0 / args.step)) with open(stats_file, 'w', encoding='utf8') as fout: - keyword_index = args.keyword + keyword_index = args.keyword_index threshold = 0.0 while threshold <= 1.0: num_false_reject = 0 diff --git a/paddlespeech/kws/models/loss.py b/paddlespeech/kws/models/loss.py index 8a2e9e74..64c9a32c 100644 --- a/paddlespeech/kws/models/loss.py +++ b/paddlespeech/kws/models/loss.py @@ -15,7 +15,16 @@ import paddle -def fill_mask_elements(condition, value, x): +def padding_mask(lengths: paddle.Tensor) -> paddle.Tensor: + batch_size = lengths.shape[0] + max_len = int(lengths.max().item()) + seq = paddle.arange(max_len, dtype=paddle.int64) + seq = seq.expand((batch_size, max_len)) + return seq >= lengths.unsqueeze(1) + + +def fill_mask_elements(condition: paddle.Tensor, value: float, + x: paddle.Tensor) -> paddle.Tensor: assert condition.shape == x.shape values = paddle.ones_like(x, dtype=x.dtype) * value return paddle.where(condition, values, x) @@ -70,11 +79,3 @@ def max_pooling_loss(logits: paddle.Tensor, acc = num_correct / num_utts # acc = 0.0 return loss, num_correct, acc - - -def padding_mask(lengths: paddle.Tensor) -> paddle.Tensor: - batch_size = lengths.shape[0] - max_len = int(lengths.max().item()) - seq = paddle.arange(max_len, dtype=paddle.int64) - seq = seq.expand((batch_size, max_len)) - return seq >= lengths.unsqueeze(1) -- GitLab