Merge pull request #723 from PaddlePaddle/autolog

fix autolog install; only autolog in test, or will hangup

Merge pull request #723 from PaddlePaddle/autolog
fix autolog install; only autolog in test, or will hangup
8ad915a9 · Hui Zhang · GitHub · 601938e9 · b076d3e9 · 8ad915a9
6 changed file
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,5 @@ tools/kenlm
 tools/sox-14.4.2
 tools/soxbindings
 tools/Montreal-Forced-Aligner/
+
+*output/
--- a/deepspeech/exps/deepspeech2/model.py
+++ b/deepspeech/exps/deepspeech2/model.py
@@ -34,9 +34,8 @@ from deepspeech.training.trainer import Trainer
 from deepspeech.utils import error_rate
 from deepspeech.utils import layer_tools
 from deepspeech.utils import mp_tools
-from deepspeech.utils.log import Log
 from deepspeech.utils.log import Autolog
-
+from deepspeech.utils.log import Log

 logger = Log(__name__).getlog()

@@ -226,8 +225,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):

    def __init__(self, config, args):
        super().__init__(config, args)
-        self.autolog = Autolog(batch_size = config.decoding.batch_size, model_name = "deepspeech2", model_precision = "fp32").getlog()
-    
+
    def ordid2token(self, texts, texts_len):
        """ ord() id to chr() chr """
        trans = []
@@ -294,6 +292,10 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
    @paddle.no_grad()
    def test(self):
        logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")
+        self.autolog = Autolog(
+            batch_size=self.config.decoding.batch_size,
+            model_name="deepspeech2",
+            model_precision="fp32").getlog()
        self.model.eval()
        cfg = self.config
        error_rate_type = None

--- a/deepspeech/utils/log.py
+++ b/deepspeech/utils/log.py
@@ -18,11 +18,8 @@ import socket
 import sys

 import auto_log
-import os
 from paddle import inference

-
-
 FORMAT_STR = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
 DATE_FMT_STR = '%Y/%m/%d %H:%M:%S'

@@ -153,28 +150,29 @@ class Log():
    def getlog(self):
        return self.logger

-class Autolog:

-    def __init__(self, batch_size, model_name = "DeepSpeech", model_precision = "fp32"):    
+class Autolog:
+    def __init__(self,
+                 batch_size,
+                 model_name="DeepSpeech",
+                 model_precision="fp32"):
        pid = os.getpid()
        gpu_id = int(os.environ['CUDA_VISIBLE_DEVICES'].split(',')[0])
        infer_config = inference.Config()
        infer_config.enable_use_gpu(100, gpu_id)
        autolog = auto_log.AutoLogger(
-            model_name = model_name,
-            model_precision = model_precision,
-            batch_size = batch_size,
+            model_name=model_name,
+            model_precision=model_precision,
+            batch_size=batch_size,
            data_shape="dynamic",
            save_path="./output/auto_log.lpg",
-            inference_config = infer_config,
-            pids = pid,
-            process_name = None,
-            gpu_ids = gpu_id,
-            time_keys=[
-                    'preprocess_time', 'inference_time', 'postprocess_time'
-            ],
+            inference_config=infer_config,
+            pids=pid,
+            process_name=None,
+            gpu_ids=gpu_id,
+            time_keys=['preprocess_time', 'inference_time', 'postprocess_time'],
            warmup=0)
        self.autolog = autolog
-    
+
    def getlog(self):
        return self.autolog
--- a/examples/dataset/timit/timit_kaldi_standard_split.py
+++ b/examples/dataset/timit/timit_kaldi_standard_split.py
@@ -50,36 +50,36 @@ def create_manifest(data_dir, manifest_path_prefix):
        total_text = 0.0
        total_num = 0

-        phn_path = os.path.join(data_dir, dtype+'.text')
+        phn_path = os.path.join(data_dir, dtype + '.text')
        phn_dict = {}
        for line in codecs.open(phn_path, 'r', 'utf-8'):
            line = line.strip()
            if line == '':
                continue
            audio_id, text = line.split(' ', 1)
-            phn_dict[audio_id] = text        
+            phn_dict[audio_id] = text

-        audio_dir = os.path.join(data_dir, dtype+'_sph.scp')
+        audio_dir = os.path.join(data_dir, dtype + '_sph.scp')
        for line in codecs.open(audio_dir, 'r', 'utf-8'):
-                audio_id, audio_path = line.strip().split()
-                # if no transcription for audio then raise error
-                assert audio_id in phn_dict
-                audio_data, samplerate = soundfile.read(audio_path)
-                duration = float(len(audio_data) / samplerate)
-                text = phn_dict[audio_id]
-                json_lines.append(
-                    json.dumps(
-                        {
-                            'utt': audio_id,
-                            'feat': audio_path,
-                            'feat_shape': (duration, ),  # second
-                            'text': text
-                        },
-                        ensure_ascii=False))
-
-                total_sec += duration
-                total_text += len(text)
-                total_num += 1
+            audio_id, audio_path = line.strip().split()
+            # if no transcription for audio then raise error
+            assert audio_id in phn_dict
+            audio_data, samplerate = soundfile.read(audio_path)
+            duration = float(len(audio_data) / samplerate)
+            text = phn_dict[audio_id]
+            json_lines.append(
+                json.dumps(
+                    {
+                        'utt': audio_id,
+                        'feat': audio_path,
+                        'feat_shape': (duration, ),  # second
+                        'text': text
+                    },
+                    ensure_ascii=False))
+
+            total_sec += duration
+            total_text += len(text)
+            total_num += 1

        manifest_path = manifest_path_prefix + '.' + dtype + '.raw'
        with codecs.open(manifest_path, 'w', 'utf-8') as fout:
@@ -99,9 +99,7 @@ def main():
    if args.src_dir.startswith('~'):
        args.src_dir = os.path.expanduser(args.src_dir)

-    prepare_dataset(
-        src_dir=args.src_dir,
-        manifest_path=args.manifest_prefix)
+    prepare_dataset(src_dir=args.src_dir, manifest_path=args.manifest_prefix)

    print("manifest prepare done!")


--- a/examples/timit/s1/README.md
+++ b/examples/timit/s1/README.md
 # TIMIT

-Results will be organized and updated soon.
\ No newline at end of file
+Results will be organized and updated soon.
--- a/setup.sh
+++ b/setup.sh
@@ -44,18 +44,17 @@ if [ $? != 0 ]; then
 fi

 #install auto-log
-python3 -c "import auto_log"
+python -c "import auto_log"
 if [ $? != 0 ]; then
    info_msg "Install auto_log into default system path"
-    git clone https://github.com/LDOUBLEV/AutoLog
+    test -d AutoLog || git clone https://github.com/LDOUBLEV/AutoLog
    if [ $? != 0 ]; then
        error_msg "Download auto_log failed !!!"
        exit 1
    fi
    cd AutoLog
-    pip3 install -r requirements.txt
-    python3 setup.py bdist_wheel
-    pip3 install ./dist/[Aa]uto*.whl
+    pip install -r requirements.txt
+    python setup.py install 
    cd ..
    rm -rf AutoLog
 fi