提交 167aaa65 编写于 作者: J Jerryuhoo

normalize wav max value to 1 in preprocess, test=tts

上级 1eec7b5e
...@@ -55,8 +55,11 @@ def process_sentence(config: Dict[str, Any], ...@@ -55,8 +55,11 @@ def process_sentence(config: Dict[str, Any],
if utt_id in sentences: if utt_id in sentences:
# reading, resampling may occur # reading, resampling may occur
wav, _ = librosa.load(str(fp), sr=config.fs) wav, _ = librosa.load(str(fp), sr=config.fs)
if len(wav.shape) != 1 or np.abs(wav).max() > 1.0: if len(wav.shape) != 1:
return record return record
max_value = np.abs(wav).max()
if max_value > 1.0:
wav = wav / max_value
assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio." assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio."
assert np.abs(wav).max( assert np.abs(wav).max(
) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM." ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
......
...@@ -47,8 +47,11 @@ def process_sentence(config: Dict[str, Any], ...@@ -47,8 +47,11 @@ def process_sentence(config: Dict[str, Any],
if utt_id in sentences: if utt_id in sentences:
# reading, resampling may occur # reading, resampling may occur
y, _ = librosa.load(str(fp), sr=config.fs) y, _ = librosa.load(str(fp), sr=config.fs)
if len(y.shape) != 1 or np.abs(y).max() > 1.0: if len(y.shape) != 1:
return record return record
max_value = np.abs(y).max()
if max_value > 1.0:
y = y / max_value
assert len(y.shape) == 1, f"{utt_id} is not a mono-channel audio." assert len(y.shape) == 1, f"{utt_id} is not a mono-channel audio."
assert np.abs(y).max( assert np.abs(y).max(
) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM." ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
......
...@@ -47,8 +47,11 @@ def process_sentence(config: Dict[str, Any], ...@@ -47,8 +47,11 @@ def process_sentence(config: Dict[str, Any],
if utt_id in sentences: if utt_id in sentences:
# reading, resampling may occur # reading, resampling may occur
wav, _ = librosa.load(str(fp), sr=config.fs) wav, _ = librosa.load(str(fp), sr=config.fs)
if len(wav.shape) != 1 or np.abs(wav).max() > 1.0: if len(wav.shape) != 1:
return record return record
max_value = np.abs(wav).max()
if max_value > 1.0:
wav = wav / max_value
assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio." assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio."
assert np.abs(wav).max( assert np.abs(wav).max(
) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM." ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
......
...@@ -51,8 +51,11 @@ def process_sentence(config: Dict[str, Any], ...@@ -51,8 +51,11 @@ def process_sentence(config: Dict[str, Any],
if utt_id in sentences: if utt_id in sentences:
# reading, resampling may occur # reading, resampling may occur
wav, _ = librosa.load(str(fp), sr=config.fs) wav, _ = librosa.load(str(fp), sr=config.fs)
if len(wav.shape) != 1 or np.abs(wav).max() > 1.0: if len(wav.shape) != 1:
return record return record
max_value = np.abs(wav).max()
if max_value > 1.0:
wav = wav / max_value
assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio." assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio."
assert np.abs(wav).max( assert np.abs(wav).max(
) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM." ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册