未验证 提交 8ad915a9 编写于 作者: H Hui Zhang 提交者: GitHub

Merge pull request #723 from PaddlePaddle/autolog

fix autolog install; only autolog in test, or will hangup
......@@ -16,3 +16,5 @@ tools/kenlm
tools/sox-14.4.2
tools/soxbindings
tools/Montreal-Forced-Aligner/
*output/
......@@ -34,9 +34,8 @@ from deepspeech.training.trainer import Trainer
from deepspeech.utils import error_rate
from deepspeech.utils import layer_tools
from deepspeech.utils import mp_tools
from deepspeech.utils.log import Log
from deepspeech.utils.log import Autolog
from deepspeech.utils.log import Log
logger = Log(__name__).getlog()
......@@ -226,8 +225,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
def __init__(self, config, args):
super().__init__(config, args)
self.autolog = Autolog(batch_size = config.decoding.batch_size, model_name = "deepspeech2", model_precision = "fp32").getlog()
def ordid2token(self, texts, texts_len):
""" ord() id to chr() chr """
trans = []
......@@ -294,6 +292,10 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
@paddle.no_grad()
def test(self):
logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")
self.autolog = Autolog(
batch_size=self.config.decoding.batch_size,
model_name="deepspeech2",
model_precision="fp32").getlog()
self.model.eval()
cfg = self.config
error_rate_type = None
......
......@@ -18,11 +18,8 @@ import socket
import sys
import auto_log
import os
from paddle import inference
FORMAT_STR = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
DATE_FMT_STR = '%Y/%m/%d %H:%M:%S'
......@@ -153,28 +150,29 @@ class Log():
def getlog(self):
return self.logger
class Autolog:
def __init__(self, batch_size, model_name = "DeepSpeech", model_precision = "fp32"):
class Autolog:
def __init__(self,
batch_size,
model_name="DeepSpeech",
model_precision="fp32"):
pid = os.getpid()
gpu_id = int(os.environ['CUDA_VISIBLE_DEVICES'].split(',')[0])
infer_config = inference.Config()
infer_config.enable_use_gpu(100, gpu_id)
autolog = auto_log.AutoLogger(
model_name = model_name,
model_precision = model_precision,
batch_size = batch_size,
model_name=model_name,
model_precision=model_precision,
batch_size=batch_size,
data_shape="dynamic",
save_path="./output/auto_log.lpg",
inference_config = infer_config,
pids = pid,
process_name = None,
gpu_ids = gpu_id,
time_keys=[
'preprocess_time', 'inference_time', 'postprocess_time'
],
inference_config=infer_config,
pids=pid,
process_name=None,
gpu_ids=gpu_id,
time_keys=['preprocess_time', 'inference_time', 'postprocess_time'],
warmup=0)
self.autolog = autolog
def getlog(self):
return self.autolog
......@@ -50,36 +50,36 @@ def create_manifest(data_dir, manifest_path_prefix):
total_text = 0.0
total_num = 0
phn_path = os.path.join(data_dir, dtype+'.text')
phn_path = os.path.join(data_dir, dtype + '.text')
phn_dict = {}
for line in codecs.open(phn_path, 'r', 'utf-8'):
line = line.strip()
if line == '':
continue
audio_id, text = line.split(' ', 1)
phn_dict[audio_id] = text
phn_dict[audio_id] = text
audio_dir = os.path.join(data_dir, dtype+'_sph.scp')
audio_dir = os.path.join(data_dir, dtype + '_sph.scp')
for line in codecs.open(audio_dir, 'r', 'utf-8'):
audio_id, audio_path = line.strip().split()
# if no transcription for audio then raise error
assert audio_id in phn_dict
audio_data, samplerate = soundfile.read(audio_path)
duration = float(len(audio_data) / samplerate)
text = phn_dict[audio_id]
json_lines.append(
json.dumps(
{
'utt': audio_id,
'feat': audio_path,
'feat_shape': (duration, ), # second
'text': text
},
ensure_ascii=False))
total_sec += duration
total_text += len(text)
total_num += 1
audio_id, audio_path = line.strip().split()
# if no transcription for audio then raise error
assert audio_id in phn_dict
audio_data, samplerate = soundfile.read(audio_path)
duration = float(len(audio_data) / samplerate)
text = phn_dict[audio_id]
json_lines.append(
json.dumps(
{
'utt': audio_id,
'feat': audio_path,
'feat_shape': (duration, ), # second
'text': text
},
ensure_ascii=False))
total_sec += duration
total_text += len(text)
total_num += 1
manifest_path = manifest_path_prefix + '.' + dtype + '.raw'
with codecs.open(manifest_path, 'w', 'utf-8') as fout:
......@@ -99,9 +99,7 @@ def main():
if args.src_dir.startswith('~'):
args.src_dir = os.path.expanduser(args.src_dir)
prepare_dataset(
src_dir=args.src_dir,
manifest_path=args.manifest_prefix)
prepare_dataset(src_dir=args.src_dir, manifest_path=args.manifest_prefix)
print("manifest prepare done!")
......
# TIMIT
Results will be organized and updated soon.
\ No newline at end of file
Results will be organized and updated soon.
......@@ -44,18 +44,17 @@ if [ $? != 0 ]; then
fi
#install auto-log
python3 -c "import auto_log"
python -c "import auto_log"
if [ $? != 0 ]; then
info_msg "Install auto_log into default system path"
git clone https://github.com/LDOUBLEV/AutoLog
test -d AutoLog || git clone https://github.com/LDOUBLEV/AutoLog
if [ $? != 0 ]; then
error_msg "Download auto_log failed !!!"
exit 1
fi
cd AutoLog
pip3 install -r requirements.txt
python3 setup.py bdist_wheel
pip3 install ./dist/[Aa]uto*.whl
pip install -r requirements.txt
python setup.py install
cd ..
rm -rf AutoLog
fi
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册