fix synthesis for transformerTTS and FastSpeech, use int64 explicitly

9dad6c3d · chenfeiyu · b7c584e2 · 9dad6c3d · 9dad6c3d
隐藏空白更改
内联并排

Showing with 7 addition and 5 deletion

examples/fastspeech/synthesis.py examples/fastspeech/synthesis.py +2 -2

examples/transformer_tts/synthesis.py examples/transformer_tts/synthesis.py +5 -3

未找到文件。
--- a/examples/fastspeech/synthesis.py
+++ b/examples/fastspeech/synthesis.py
@@ -83,8 +83,8 @@ def synthesis(text_input, args):
    pos_text = np.arange(1, text.shape[1] + 1)
    pos_text = np.expand_dims(pos_text, axis=0)
-    text = dg.to_variable(text)
+    text = dg.to_variable(text).astype(np.int64)
-    pos_text = dg.to_variable(pos_text)
+    pos_text = dg.to_variable(pos_text).astype(np.int64)
    _, mel_output_postnet = model(text, pos_text, alpha=args.alpha)

--- a/examples/transformer_tts/synthesis.py
+++ b/examples/transformer_tts/synthesis.py
@@ -92,15 +92,17 @@ def synthesis(text_input, args):
        model_vocoder.eval()
    # init input
    text = np.asarray(text_to_sequence(text_input))
-    text = fluid.layers.unsqueeze(dg.to_variable(text), [0])
+    text = fluid.layers.unsqueeze(dg.to_variable(text).astype(np.int64), [0])
    mel_input = dg.to_variable(np.zeros([1, 1, 80])).astype(np.float32)
    pos_text = np.arange(1, text.shape[1] + 1)
-    pos_text = fluid.layers.unsqueeze(dg.to_variable(pos_text), [0])
+    pos_text = fluid.layers.unsqueeze(
+        dg.to_variable(pos_text).astype(np.int64), [0])
    pbar = tqdm(range(args.max_len))
    for i in pbar:
        pos_mel = np.arange(1, mel_input.shape[1] + 1)
-        pos_mel = fluid.layers.unsqueeze(dg.to_variable(pos_mel), [0])
+        pos_mel = fluid.layers.unsqueeze(
+            dg.to_variable(pos_mel).astype(np.int64), [0])
        mel_pred, postnet_pred, attn_probs, stop_preds, attn_enc, attn_dec = model(
            text, mel_input, pos_text, pos_mel)
        mel_input = fluid.layers.concat(