“ae6dae4189edd0c6c087c5b44c5fd02a16c7af77”上不存在“mobile/src/operators/kernel/arm/pool_kernel.cpp”
未验证 提交 caa391f4 编写于 作者: 小湉湉's avatar 小湉湉 提交者: GitHub

fix speedyspeech inference, test=tts (#1322)

上级 0c4895cd
......@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle
from paddle import nn
......@@ -23,18 +22,16 @@ def expand(encodings: paddle.Tensor, durations: paddle.Tensor) -> paddle.Tensor:
encodings: (B, T, C)
durations: (B, T)
"""
batch_size, t_enc = durations.shape
durations = durations.numpy()
slens = np.sum(durations, -1)
t_dec = np.max(slens)
M = np.zeros([batch_size, t_dec, t_enc])
batch_size, t_enc = paddle.shape(durations)
slens = paddle.sum(durations, -1)
t_dec = paddle.max(slens)
M = paddle.zeros([batch_size, t_dec, t_enc])
for i in range(batch_size):
k = 0
for j in range(t_enc):
d = durations[i, j]
M[i, k:k + d, j] = 1
k += d
M = paddle.to_tensor(M, dtype=encodings.dtype)
encodings = paddle.matmul(M, encodings)
return encodings
......@@ -234,28 +231,14 @@ class SpeedySpeech(nn.Layer):
encodings = self.encoder(text, tones, spk_id)
if type(durations) == type(None):
pred_durations = self.duration_predictor(encodings) # (1, T)
if durations is None:
# (1, T)
pred_durations = self.duration_predictor(encodings)
durations_to_expand = paddle.round(pred_durations.exp())
durations_to_expand = (durations_to_expand).astype(paddle.int64)
slens = paddle.sum(durations_to_expand, -1) # [1]
t_dec = slens[0] # [1]
t_enc = paddle.shape(pred_durations)[-1]
M = paddle.zeros([1, t_dec, t_enc])
k = paddle.full([1], 0, dtype=paddle.int64)
for j in range(t_enc):
d = durations_to_expand[0, j]
# If the d == 0, slice action is meaningless and not supported
if d >= 1:
M[0, k:k + d, j] = 1
k += d
encodings = paddle.matmul(M, encodings)
durations_to_expand = durations_to_expand.astype(paddle.int64)
else:
durations_to_expand = durations
encodings = expand(encodings, durations_to_expand)
encodings = expand(encodings, durations_to_expand)
shape = paddle.shape(encodings)
t_dec, feature_size = shape[1], shape[2]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册