Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Parakeet
提交
8c22397b
P
Parakeet
项目概览
PaddlePaddle
/
Parakeet
通知
11
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Parakeet
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8c22397b
编写于
12月 16, 2019
作者:
K
Kexin Zhao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add working synthesis code
上级
f6f0a2ca
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
106 addition
and
24 deletion
+106
-24
parakeet/models/waveflow/data.py
parakeet/models/waveflow/data.py
+2
-6
parakeet/models/waveflow/synthesis.py
parakeet/models/waveflow/synthesis.py
+4
-4
parakeet/models/waveflow/waveflow.py
parakeet/models/waveflow/waveflow.py
+33
-11
parakeet/models/waveflow/waveflow_modules.py
parakeet/models/waveflow/waveflow_modules.py
+67
-3
未找到文件。
parakeet/models/waveflow/data.py
浏览文件 @
8c22397b
...
...
@@ -79,17 +79,13 @@ class Subset(dataset.Dataset):
mode
=
'constant'
,
constant_values
=
0
)
# Normalize audio.
audio
=
audio
/
MAX_WAV_VALUE
audio
=
audio
.
astype
(
np
.
float32
)
/
MAX_WAV_VALUE
mel
=
self
.
get_mel
(
audio
)
#print("mel = {}, dtype {}, shape {}".format(mel, mel.dtype, mel.shape))
return
audio
,
mel
def
_batch_examples
(
self
,
batch
):
audio_batch
=
[]
mel_batch
=
[]
for
audio
,
mel
in
batch
:
audio_batch
audios
=
[
sample
[
0
]
for
sample
in
batch
]
mels
=
[
sample
[
1
]
for
sample
in
batch
]
...
...
parakeet/models/waveflow/synthesis.py
浏览文件 @
8c22397b
...
...
@@ -8,11 +8,11 @@ import paddle.fluid.dygraph as dg
from
paddle
import
fluid
import
utils
from
wave
net
import
WaveNet
from
wave
flow
import
WaveFlow
def
add_options_to_parser
(
parser
):
parser
.
add_argument
(
'--model'
,
type
=
str
,
default
=
'wave
net
'
,
parser
.
add_argument
(
'--model'
,
type
=
str
,
default
=
'wave
flow
'
,
help
=
"general name of the model"
)
parser
.
add_argument
(
'--name'
,
type
=
str
,
help
=
"specific name of the training model"
)
...
...
@@ -30,7 +30,7 @@ def add_options_to_parser(parser):
parser
.
add_argument
(
'--output'
,
type
=
str
,
default
=
"./syn_audios"
,
help
=
"path to write synthesized audio files"
)
parser
.
add_argument
(
'--sample'
,
type
=
int
,
parser
.
add_argument
(
'--sample'
,
type
=
int
,
default
=
None
,
help
=
"which of the valid samples to synthesize audio"
)
...
...
@@ -54,7 +54,7 @@ def synthesize(config):
print
(
"Random Seed: "
,
seed
)
# Build model.
model
=
Wave
Net
(
config
,
checkpoint_dir
)
model
=
Wave
Flow
(
config
,
checkpoint_dir
)
model
.
build
(
training
=
False
)
# Obtain the current iteration.
...
...
parakeet/models/waveflow/waveflow.py
浏览文件 @
8c22397b
...
...
@@ -2,7 +2,8 @@ import itertools
import
os
import
time
import
librosa
#import librosa
from
scipy.io.wavfile
import
write
import
numpy
as
np
import
paddle.fluid.dygraph
as
dg
from
paddle
import
fluid
...
...
@@ -156,17 +157,38 @@ class WaveFlow():
output
=
"{}/{}/iter-{}"
.
format
(
config
.
output
,
config
.
name
,
iteration
)
os
.
makedirs
(
output
,
exist_ok
=
True
)
filename
=
"{}/valid_{}.wav"
.
format
(
output
,
sample
)
print
(
"Synthesize sample {}, save as {}"
.
format
(
sample
,
filename
))
mels_list
=
[
mels
for
_
,
mels
in
self
.
validloader
()]
if
sample
is
not
None
:
mels_list
=
[
mels_list
[
sample
]]
mels_list
=
[
mels
for
_
,
mels
,
_
in
self
.
validloader
()]
start_time
=
time
.
time
()
syn_audio
=
self
.
waveflow
.
synthesize
(
mels_list
[
sample
])
syn_time
=
time
.
time
()
-
start_time
print
(
"audio shape {}, synthesis time {}"
.
format
(
syn_audio
.
shape
,
syn_time
))
librosa
.
output
.
write_wav
(
filename
,
syn_audio
,
sr
=
config
.
sample_rate
)
audio_times
=
[]
inf_times
=
[]
for
sample
,
mel
in
enumerate
(
mels_list
):
filename
=
"{}/valid_{}.wav"
.
format
(
output
,
sample
)
print
(
"Synthesize sample {}, save as {}"
.
format
(
sample
,
filename
))
start_time
=
time
.
time
()
audio
=
self
.
waveflow
.
synthesize
(
mel
)
syn_time
=
time
.
time
()
-
start_time
audio_time
=
audio
.
shape
[
0
]
/
22050
print
(
"audio time {}, synthesis time {}, speedup: {}"
.
format
(
audio_time
,
syn_time
,
audio_time
/
syn_time
))
#librosa.output.write_wav(filename, syn_audio,
# sr=config.sample_rate)
audio
=
audio
.
numpy
()
*
32768.0
audio
=
audio
.
astype
(
'int16'
)
write
(
filename
,
config
.
sample_rate
,
audio
)
audio_times
.
append
(
audio_time
)
inf_times
.
append
(
syn_time
)
total_audio
=
sum
(
audio_times
)
total_inf
=
sum
(
inf_times
)
print
(
"Total audio: {}, total inf time {}, speedup: {}"
.
format
(
total_audio
,
total_inf
,
total_audio
/
total_inf
))
def
save
(
self
,
iteration
):
utils
.
save_latest_parameters
(
self
.
checkpoint_dir
,
iteration
,
...
...
parakeet/models/waveflow/waveflow_modules.py
浏览文件 @
8c22397b
...
...
@@ -75,6 +75,16 @@ class Conditioner(dg.Layer):
return
fluid
.
layers
.
squeeze
(
x
,
[
1
])
def
infer
(
self
,
x
):
x
=
fluid
.
layers
.
unsqueeze
(
x
,
1
)
for
layer
in
self
.
upsample_conv2d
:
x
=
layer
(
x
)
# Trim conv artifacts.
time_cutoff
=
layer
.
_filter_size
[
1
]
-
layer
.
_stride
[
1
]
x
=
fluid
.
layers
.
leaky_relu
(
x
[:,
:,
:,
:
-
time_cutoff
],
alpha
=
0.4
)
return
fluid
.
layers
.
squeeze
(
x
,
[
1
])
class
Flow
(
dg
.
Layer
):
def
__init__
(
self
,
name_scope
,
config
):
...
...
@@ -183,6 +193,14 @@ class Flow(dg.Layer):
return
self
.
end
(
output
)
def
debug
(
x
,
msg
):
y
=
x
.
numpy
()
print
(
msg
+
" :
\n
"
,
y
)
print
(
"shape: "
,
y
.
shape
)
print
(
"dtype: "
,
y
.
dtype
)
print
(
""
)
class
WaveFlowModule
(
dg
.
Layer
):
def
__init__
(
self
,
name_scope
,
config
):
super
(
WaveFlowModule
,
self
).
__init__
(
name_scope
)
...
...
@@ -217,7 +235,7 @@ class WaveFlowModule(dg.Layer):
if
mel
.
shape
[
2
]
>
pruned_len
:
mel
=
mel
[:,
:,
:
pruned_len
]
# From [bs, mel_bands, time] to [bs, mel_bands, n_group, time/n_group]
# From [bs, mel_bands, time] to [bs, mel_bands, n_group, time/n_group]
mel
=
fluid
.
layers
.
transpose
(
unfold
(
mel
,
self
.
n_group
),
[
0
,
1
,
3
,
2
])
# From [bs, time] to [bs, n_group, time/n_group]
audio
=
fluid
.
layers
.
transpose
(
unfold
(
audio
,
self
.
n_group
),
[
0
,
2
,
1
])
...
...
@@ -247,8 +265,54 @@ class WaveFlowModule(dg.Layer):
return
z
,
log_s_list
def
synthesize
(
self
,
mels
):
pass
def
synthesize
(
self
,
mel
,
sigma
=
1.0
):
#debug(mel, "mel")
mel
=
self
.
conditioner
.
infer
(
mel
)
#debug(mel, "mel after conditioner")
# From [bs, mel_bands, time] to [bs, mel_bands, n_group, time/n_group]
mel
=
fluid
.
layers
.
transpose
(
unfold
(
mel
,
self
.
n_group
),
[
0
,
1
,
3
,
2
])
#debug(mel, "after group")
audio
=
fluid
.
layers
.
gaussian_random
(
shape
=
[
mel
.
shape
[
0
],
1
,
mel
.
shape
[
2
],
mel
.
shape
[
3
]],
std
=
sigma
)
#debug(audio, "audio")
for
i
in
reversed
(
range
(
self
.
n_flows
)):
# Permute over the height dimension.
audio_slices
=
[
audio
[:,
:,
j
,
:]
for
j
in
self
.
perms
[
i
]]
audio
=
fluid
.
layers
.
stack
(
audio_slices
,
axis
=
2
)
mel_slices
=
[
mel
[:,
:,
j
,
:]
for
j
in
self
.
perms
[
i
]]
mel
=
fluid
.
layers
.
stack
(
mel_slices
,
axis
=
2
)
audio_list
=
[]
audio_0
=
audio
[:,
:,
:
1
,
:]
audio_list
.
append
(
audio_0
)
for
h
in
range
(
1
,
self
.
n_group
):
# inputs: [bs, 1, h, time/n_group]
inputs
=
fluid
.
layers
.
concat
(
audio_list
,
axis
=
2
)
conds
=
mel
[:,
:,
1
:(
h
+
1
),
:]
outputs
=
self
.
flows
[
i
](
inputs
,
conds
)
log_s
=
outputs
[:,
:
1
,
(
h
-
1
):
h
,
:]
b
=
outputs
[:,
1
:,
(
h
-
1
):
h
,
:]
audio_h
=
(
audio
[:,
:,
h
:(
h
+
1
),
:]
-
b
)
/
fluid
.
layers
.
exp
(
log_s
)
audio_list
.
append
(
audio_h
)
audio
=
fluid
.
layers
.
concat
(
audio_list
,
axis
=
2
)
#print("audio.shape =", audio.shape)
# Assume batch size = 1
# audio: [n_group, time/n_group]
audio
=
fluid
.
layers
.
squeeze
(
audio
,
[
0
,
1
])
# audio: [time]
audio
=
fluid
.
layers
.
reshape
(
fluid
.
layers
.
transpose
(
audio
,
[
1
,
0
]),
[
-
1
])
#print("audio.shape =", audio.shape)
return
audio
def
start_new_sequence
(
self
):
for
layer
in
self
.
sublayers
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录