run_data.sh 975 字节
Newer Older
1 2
#! /usr/bin/bash

3
pushd ../.. > /dev/null
4 5

# download data, generate manifests
6
python data/librispeech/librispeech.py \
7
--manifest_prefix='data/tiny/manifest' \
8 9
--target_dir='~/.cache/paddle/dataset/speech/libri' \
--full_download='False'
10 11 12 13 14 15

if [ $? -ne 0 ]; then
    echo "Prepare LibriSpeech failed. Terminated."
    exit 1
fi

16
head -n 64 data/tiny/manifest.dev-clean  > data/tiny/manifest.tiny
17 18 19 20 21 22


# build vocabulary
python tools/build_vocab.py \
--count_threshold=0 \
--vocab_path='data/tiny/vocab.txt' \
23
--manifest_paths='data/tiny/manifest.dev'
24 25 26 27 28 29 30 31 32

if [ $? -ne 0 ]; then
    echo "Build vocabulary failed. Terminated."
    exit 1
fi


# compute mean and stddev for normalizer
python tools/compute_mean_std.py \
33 34
--manifest_path='data/tiny/manifest.tiny' \
--num_samples=64 \
35 36 37 38 39 40 41 42 43 44
--specgram_type='linear' \
--output_path='data/tiny/mean_std.npz'

if [ $? -ne 0 ]; then
    echo "Compute mean and stddev failed. Terminated."
    exit 1
fi


echo "Tiny data preparation done."
45
exit 0