From 43cdb261d022a2bb947a1a99999b830ff54c0088 Mon Sep 17 00:00:00 2001
From: Xinghai Sun <sunxinghai1216@gmail.com>
Date: Sun, 15 Oct 2017 03:41:20 -0700
Subject: [PATCH] Fix a bug in running tools/compute_meanstd.py with seqbin
 data.

---
 deep_speech_2/data_utils/audio.py  |  7 +++++--
 deep_speech_2/data_utils/data.py   |  4 ----
 deep_speech_2/data_utils/speech.py | 14 --------------
 3 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/deep_speech_2/data_utils/audio.py b/deep_speech_2/data_utils/audio.py
index 01c06484..3fb78295 100644
--- a/deep_speech_2/data_utils/audio.py
+++ b/deep_speech_2/data_utils/audio.py
@@ -65,8 +65,11 @@ class AudioSegment(object):
         :return: Audio segment instance.
         :rtype: AudioSegment
         """
-        samples, sample_rate = soundfile.read(file, dtype='float32')
-        return cls(samples, sample_rate)
+        if isinstance(file, basestring) and re.findall(r".seqbin_\d+$", file):
+            return cls.from_sequence_file(file)
+        else:
+            samples, sample_rate = soundfile.read(file, dtype='float32')
+            return cls(samples, sample_rate)
 
     @classmethod
     def slice_from_file(cls, file, start=None, end=None):
diff --git a/deep_speech_2/data_utils/data.py b/deep_speech_2/data_utils/data.py
index fca53817..71ba2434 100644
--- a/deep_speech_2/data_utils/data.py
+++ b/deep_speech_2/data_utils/data.py
@@ -7,7 +7,6 @@ from __future__ import print_function
 
 import random
 import tarfile
-import re
 import multiprocessing
 import numpy as np
 import paddle.v2 as paddle
@@ -105,9 +104,6 @@ class DataGenerator(object):
         if filename.startswith('tar:'):
             speech_segment = SpeechSegment.from_file(
                 self._subfile_from_tar(filename), transcript)
-        elif re.findall(r".seqbin_\d+$", filename):
-            speech_segment = SpeechSegment.from_sequence_file(filename,
-                                                              transcript)
         else:
             speech_segment = SpeechSegment.from_file(filename, transcript)
         self._augmentation_pipeline.transform_audio(speech_segment)
diff --git a/deep_speech_2/data_utils/speech.py b/deep_speech_2/data_utils/speech.py
index 623b38c2..0cea8873 100644
--- a/deep_speech_2/data_utils/speech.py
+++ b/deep_speech_2/data_utils/speech.py
@@ -50,20 +50,6 @@ class SpeechSegment(AudioSegment):
         audio = AudioSegment.from_file(filepath)
         return cls(audio.samples, audio.sample_rate, transcript)
 
-    @classmethod
-    def from_sequence_file(cls, filepath, transcript):
-        """Create speech segment from sequence file and transcript.
-        
-        :param filepath: Filepath of sequence file.
-        :type filepath: basestring
-        :param transcript: Transcript text for the speech.
-        :type transript: basestring
-        :return: Speech segment instance.
-        :rtype: SpeechSegment
-        """
-        audio = AudioSegment.from_sequence_file(filepath)
-        return cls(audio.samples, audio.sample_rate, transcript)
-
     @classmethod
     def from_bytes(cls, bytes, transcript):
         """Create speech segment from a byte string and corresponding
-- 
GitLab