未验证 提交 8ad915a9 编写于 作者: H Hui Zhang 提交者: GitHub

Merge pull request #723 from PaddlePaddle/autolog

fix autolog install; only autolog in test, or will hangup
...@@ -16,3 +16,5 @@ tools/kenlm ...@@ -16,3 +16,5 @@ tools/kenlm
tools/sox-14.4.2 tools/sox-14.4.2
tools/soxbindings tools/soxbindings
tools/Montreal-Forced-Aligner/ tools/Montreal-Forced-Aligner/
*output/
...@@ -34,9 +34,8 @@ from deepspeech.training.trainer import Trainer ...@@ -34,9 +34,8 @@ from deepspeech.training.trainer import Trainer
from deepspeech.utils import error_rate from deepspeech.utils import error_rate
from deepspeech.utils import layer_tools from deepspeech.utils import layer_tools
from deepspeech.utils import mp_tools from deepspeech.utils import mp_tools
from deepspeech.utils.log import Log
from deepspeech.utils.log import Autolog from deepspeech.utils.log import Autolog
from deepspeech.utils.log import Log
logger = Log(__name__).getlog() logger = Log(__name__).getlog()
...@@ -226,8 +225,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): ...@@ -226,8 +225,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
def __init__(self, config, args): def __init__(self, config, args):
super().__init__(config, args) super().__init__(config, args)
self.autolog = Autolog(batch_size = config.decoding.batch_size, model_name = "deepspeech2", model_precision = "fp32").getlog()
def ordid2token(self, texts, texts_len): def ordid2token(self, texts, texts_len):
""" ord() id to chr() chr """ """ ord() id to chr() chr """
trans = [] trans = []
...@@ -294,6 +292,10 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): ...@@ -294,6 +292,10 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
@paddle.no_grad() @paddle.no_grad()
def test(self): def test(self):
logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}") logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")
self.autolog = Autolog(
batch_size=self.config.decoding.batch_size,
model_name="deepspeech2",
model_precision="fp32").getlog()
self.model.eval() self.model.eval()
cfg = self.config cfg = self.config
error_rate_type = None error_rate_type = None
......
...@@ -18,11 +18,8 @@ import socket ...@@ -18,11 +18,8 @@ import socket
import sys import sys
import auto_log import auto_log
import os
from paddle import inference from paddle import inference
FORMAT_STR = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s' FORMAT_STR = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
DATE_FMT_STR = '%Y/%m/%d %H:%M:%S' DATE_FMT_STR = '%Y/%m/%d %H:%M:%S'
...@@ -153,28 +150,29 @@ class Log(): ...@@ -153,28 +150,29 @@ class Log():
def getlog(self): def getlog(self):
return self.logger return self.logger
class Autolog:
def __init__(self, batch_size, model_name = "DeepSpeech", model_precision = "fp32"): class Autolog:
def __init__(self,
batch_size,
model_name="DeepSpeech",
model_precision="fp32"):
pid = os.getpid() pid = os.getpid()
gpu_id = int(os.environ['CUDA_VISIBLE_DEVICES'].split(',')[0]) gpu_id = int(os.environ['CUDA_VISIBLE_DEVICES'].split(',')[0])
infer_config = inference.Config() infer_config = inference.Config()
infer_config.enable_use_gpu(100, gpu_id) infer_config.enable_use_gpu(100, gpu_id)
autolog = auto_log.AutoLogger( autolog = auto_log.AutoLogger(
model_name = model_name, model_name=model_name,
model_precision = model_precision, model_precision=model_precision,
batch_size = batch_size, batch_size=batch_size,
data_shape="dynamic", data_shape="dynamic",
save_path="./output/auto_log.lpg", save_path="./output/auto_log.lpg",
inference_config = infer_config, inference_config=infer_config,
pids = pid, pids=pid,
process_name = None, process_name=None,
gpu_ids = gpu_id, gpu_ids=gpu_id,
time_keys=[ time_keys=['preprocess_time', 'inference_time', 'postprocess_time'],
'preprocess_time', 'inference_time', 'postprocess_time'
],
warmup=0) warmup=0)
self.autolog = autolog self.autolog = autolog
def getlog(self): def getlog(self):
return self.autolog return self.autolog
...@@ -50,36 +50,36 @@ def create_manifest(data_dir, manifest_path_prefix): ...@@ -50,36 +50,36 @@ def create_manifest(data_dir, manifest_path_prefix):
total_text = 0.0 total_text = 0.0
total_num = 0 total_num = 0
phn_path = os.path.join(data_dir, dtype+'.text') phn_path = os.path.join(data_dir, dtype + '.text')
phn_dict = {} phn_dict = {}
for line in codecs.open(phn_path, 'r', 'utf-8'): for line in codecs.open(phn_path, 'r', 'utf-8'):
line = line.strip() line = line.strip()
if line == '': if line == '':
continue continue
audio_id, text = line.split(' ', 1) audio_id, text = line.split(' ', 1)
phn_dict[audio_id] = text phn_dict[audio_id] = text
audio_dir = os.path.join(data_dir, dtype+'_sph.scp') audio_dir = os.path.join(data_dir, dtype + '_sph.scp')
for line in codecs.open(audio_dir, 'r', 'utf-8'): for line in codecs.open(audio_dir, 'r', 'utf-8'):
audio_id, audio_path = line.strip().split() audio_id, audio_path = line.strip().split()
# if no transcription for audio then raise error # if no transcription for audio then raise error
assert audio_id in phn_dict assert audio_id in phn_dict
audio_data, samplerate = soundfile.read(audio_path) audio_data, samplerate = soundfile.read(audio_path)
duration = float(len(audio_data) / samplerate) duration = float(len(audio_data) / samplerate)
text = phn_dict[audio_id] text = phn_dict[audio_id]
json_lines.append( json_lines.append(
json.dumps( json.dumps(
{ {
'utt': audio_id, 'utt': audio_id,
'feat': audio_path, 'feat': audio_path,
'feat_shape': (duration, ), # second 'feat_shape': (duration, ), # second
'text': text 'text': text
}, },
ensure_ascii=False)) ensure_ascii=False))
total_sec += duration total_sec += duration
total_text += len(text) total_text += len(text)
total_num += 1 total_num += 1
manifest_path = manifest_path_prefix + '.' + dtype + '.raw' manifest_path = manifest_path_prefix + '.' + dtype + '.raw'
with codecs.open(manifest_path, 'w', 'utf-8') as fout: with codecs.open(manifest_path, 'w', 'utf-8') as fout:
...@@ -99,9 +99,7 @@ def main(): ...@@ -99,9 +99,7 @@ def main():
if args.src_dir.startswith('~'): if args.src_dir.startswith('~'):
args.src_dir = os.path.expanduser(args.src_dir) args.src_dir = os.path.expanduser(args.src_dir)
prepare_dataset( prepare_dataset(src_dir=args.src_dir, manifest_path=args.manifest_prefix)
src_dir=args.src_dir,
manifest_path=args.manifest_prefix)
print("manifest prepare done!") print("manifest prepare done!")
......
# TIMIT # TIMIT
Results will be organized and updated soon. Results will be organized and updated soon.
\ No newline at end of file
...@@ -44,18 +44,17 @@ if [ $? != 0 ]; then ...@@ -44,18 +44,17 @@ if [ $? != 0 ]; then
fi fi
#install auto-log #install auto-log
python3 -c "import auto_log" python -c "import auto_log"
if [ $? != 0 ]; then if [ $? != 0 ]; then
info_msg "Install auto_log into default system path" info_msg "Install auto_log into default system path"
git clone https://github.com/LDOUBLEV/AutoLog test -d AutoLog || git clone https://github.com/LDOUBLEV/AutoLog
if [ $? != 0 ]; then if [ $? != 0 ]; then
error_msg "Download auto_log failed !!!" error_msg "Download auto_log failed !!!"
exit 1 exit 1
fi fi
cd AutoLog cd AutoLog
pip3 install -r requirements.txt pip install -r requirements.txt
python3 setup.py bdist_wheel python setup.py install
pip3 install ./dist/[Aa]uto*.whl
cd .. cd ..
rm -rf AutoLog rm -rf AutoLog
fi fi
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册