提交 dfa3a5c5 编写于 作者: C Corentin Jemine

Backing up decent results

上级 b93c52d3
......@@ -16,3 +16,4 @@ tacotron2
data
SV2TTS/datasets/temp
SV2TTS/_old/
SV2TTS/saved_models/*_bak_*.pt
\ No newline at end of file
......@@ -38,7 +38,7 @@ class SpeakerVerificationDataset(Dataset):
params = OrderedDict([
("Total speakers", len(self.speakers)),
("Average utterances per speaker", self.mean_n_utterances),
("Datasets", ','.join(self.datasets)),
("Datasets", ', '.join(self.datasets)),
])
return params
......
......@@ -97,20 +97,20 @@ class SpeakerEncoder(nn.Module):
sim_matrix = sim_matrix * self.similarity_weight + self.similarity_bias
# Loss
ground_truth = torch.from_numpy(
np.repeat(np.arange(speakers_per_batch), utterances_per_speaker)
).long()
loss = self.loss_fn(sim_matrix, ground_truth)
ground_truth = np.repeat(np.arange(speakers_per_batch), utterances_per_speaker)
loss = self.loss_fn(sim_matrix, torch.from_numpy(ground_truth).long())
# EER (not backpropagated)
sim_matrix = sim_matrix.detach().numpy()
with torch.no_grad():
## Imabalanced EER
inv_argmax = lambda i: np.eye(1, speakers_per_batch, i, dtype=np.int)[0]
labels = np.array([inv_argmax(i) for i in ground_truth])
preds = sim_matrix.detach().numpy()
preds = sim_matrix
# Snippet from https://yangcha.github.io/EER-ROC/
fpr, tpr, thresholds = roc_curve(labels.flatten(), preds.flatten())
eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)
# thresh = interp1d(fpr, thresholds)(eer)
return loss, eer
\ No newline at end of file
......@@ -6,7 +6,7 @@ model_num_layers = 3
## Training parameters
learning_rate_init = 2e-5
learning_rate_init = 1e-4 #2e-5
# exponential_decay_beta = 0.9998
speakers_per_batch = 64
speakers_per_batch = 32
utterances_per_speaker = 10
\ No newline at end of file
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册