Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
mrywhh
Real-Time-Voice-Cloning
提交
60e96934
R
Real-Time-Voice-Cloning
项目概览
mrywhh
/
Real-Time-Voice-Cloning
落后 Fork 源项目 12 个版本
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
R
Real-Time-Voice-Cloning
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
60e96934
编写于
4月 08, 2019
作者:
C
Corentin Jemine
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fixed a bug with the encoding in the vocoder. Trying a bigger vocoder network
上级
4da25d2b
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
29 addition
and
25 deletion
+29
-25
wave-rnn/checkpoints/mu_law_big.pt
wave-rnn/checkpoints/mu_law_big.pt
+0
-0
wave-rnn/inference_demo.py
wave-rnn/inference_demo.py
+3
-1
wave-rnn/train.py
wave-rnn/train.py
+3
-3
wave-rnn/vocoder/params.py
wave-rnn/vocoder/params.py
+21
-21
wave-rnn/vocoder/vocoder_dataset.py
wave-rnn/vocoder/vocoder_dataset.py
+2
-0
未找到文件。
wave-rnn/checkpoints/mu_law_big.pt
0 → 100644
浏览文件 @
60e96934
文件已添加
wave-rnn/inference_demo.py
浏览文件 @
60e96934
...
...
@@ -3,7 +3,7 @@ from vocoder.vocoder_dataset import VocoderDataset
from
vocoder
import
inference
from
vocoder
import
audio
import
numpy
as
np
from
vocoder.params
import
print_params
,
model_name
from
vocoder.params
import
print_params
,
model_name
,
use_mu_law
print_params
()
...
...
@@ -28,6 +28,8 @@ for i in sorted(np.random.choice(len(dataset), n_samples)):
out_pred_fpath
=
fileio
.
join
(
gen_path
,
"%s_%d_pred.wav"
%
(
model_name
,
i
))
wav_gt
=
audio
.
restore_signal
(
wav_gt
)
if
use_mu_law
:
wav_gt
=
audio
.
expand_signal
(
wav_gt
)
wav_pred
=
inference
.
infer_waveform
(
mel
,
normalize
=
False
)
# The dataloader already normalizes
audio
.
save_wav
(
out_pred_fpath
,
wav_pred
)
...
...
wave-rnn/train.py
浏览文件 @
60e96934
...
...
@@ -34,8 +34,8 @@ model_dir = 'checkpoints'
fileio
.
ensure_dir
(
model_dir
)
model_fpath
=
fileio
.
join
(
model_dir
,
model_name
+
'.pt'
)
#
data_path = "../data/Synthesizer"
data_path
=
"E:/Datasets/Synthesizer"
data_path
=
"../data/Synthesizer"
#
data_path = "E:/Datasets/Synthesizer"
gen_path
=
'model_outputs'
fileio
.
ensure_dir
(
gen_path
)
...
...
@@ -125,6 +125,6 @@ if __name__ == '__main__':
print
(
'<saved>'
)
optimizer
=
optim
.
Adam
(
model
.
parameters
())
train
(
model
,
optimizer
,
epochs
=
60
,
batch_size
=
64
,
classes
=
2
**
bits
,
train
(
model
,
optimizer
,
epochs
=
100
,
batch_size
=
100
,
classes
=
2
**
bits
,
seq_len
=
seq_len
,
step
=
step
,
lr
=
1e-4
)
\ No newline at end of file
wave-rnn/vocoder/params.py
浏览文件 @
60e96934
...
...
@@ -17,38 +17,38 @@ use_mu_law = True
# Minimum number of mel frames below which samples are discarded for training
min_n_frames
=
10
## Model parameters
model_name
=
'mu_law'
# Number of bits for the encoding. Higher means higher quality output but longer training time
# and training memory required.
bits
=
9
pad
=
2
seq_len
=
hop_length
*
5
mel_win
=
seq_len
//
hop_length
+
2
*
pad
rnn_dims
=
512
fc_dims
=
512
upsample_factors
=
(
5
,
5
,
8
)
feat_dims
=
80
compute_dims
=
128
res_out_dims
=
128
res_blocks
=
10
# ## Model parameters
# model_name = 'mu_law
_big
'
# model_name = 'mu_law'
# # Number of bits for the encoding. Higher means higher quality output but longer training time
# # and training memory required.
# bits = 9
# pad = 2
# seq_len = hop_length * 5
# mel_win = seq_len // hop_length + 2 * pad
# rnn_dims =
768
# fc_dims =
768
# rnn_dims =
512
# fc_dims =
512
# upsample_factors = (5, 5, 8)
# feat_dims = 80
# compute_dims = 1
96
# res_out_dims = 1
96
# compute_dims = 1
28
# res_out_dims = 1
28
# res_blocks = 10
## Model parameters
model_name
=
'mu_law_big'
# Number of bits for the encoding. Higher means higher quality output but longer training time
# and training memory required.
bits
=
9
pad
=
2
seq_len
=
hop_length
*
5
mel_win
=
seq_len
//
hop_length
+
2
*
pad
rnn_dims
=
768
fc_dims
=
768
upsample_factors
=
(
5
,
5
,
8
)
feat_dims
=
80
compute_dims
=
196
res_out_dims
=
196
res_blocks
=
10
def
print_params
():
for
param_name
in
sorted
(
globals
()):
...
...
wave-rnn/vocoder/vocoder_dataset.py
浏览文件 @
60e96934
...
...
@@ -22,6 +22,8 @@ class VocoderDataset(Dataset):
# Load the wav and quantize it
wav
=
np
.
load
(
wav_path
)
if
use_mu_law
:
wav
=
audio
.
compand_signal
(
wav
)
quant
=
audio
.
quantize_signal
(
wav
)
# Load the mel spectrogram and adjust its range to [0, 1]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录