“136c9f32074c5571a37423cfdf05c875425a066a”上不存在“mobile/src/operators/kernel/conv_relu_kernel.h”
提交 3cbfd7bf 编写于 作者: J Jerryuhoo

Add speaker embedding and speaker id for style fastspeech2 inference

上级 db121226
......@@ -907,7 +907,9 @@ class StyleFastSpeech2Inference(FastSpeech2Inference):
energy: Union[paddle.Tensor, np.ndarray]=None,
energy_scale: Union[int, float]=None,
energy_bias: Union[int, float]=None,
robot: bool=False):
robot: bool=False,
spk_emb=None,
spk_id=None):
"""
Parameters
----------
......@@ -938,8 +940,9 @@ class StyleFastSpeech2Inference(FastSpeech2Inference):
Tensor
Output sequence of features (L, odim).
"""
spk_id = paddle.to_tensor(spk_id)
normalized_mel, d_outs, p_outs, e_outs = self.acoustic_model.inference(
text, durations=None, pitch=None, energy=None)
text, durations=None, pitch=None, energy=None, spk_emb=spk_emb, spk_id=spk_id)
# priority: groundtruth > scale/bias > previous output
# set durations
if isinstance(durations, np.ndarray):
......@@ -991,7 +994,10 @@ class StyleFastSpeech2Inference(FastSpeech2Inference):
durations=durations,
pitch=pitch,
energy=energy,
use_teacher_forcing=True)
use_teacher_forcing=True,
spk_emb=spk_emb,
spk_id=spk_id
)
logmel = self.normalizer.inverse(normalized_mel)
return logmel
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册