diff --git a/paddlespeech/t2s/datasets/am_batch_fn.py b/paddlespeech/t2s/datasets/am_batch_fn.py index 05471167f05e7249ee310b55dcda788c38688119..2cb7a11a22b02eb25da3c38d6dfdc56d64eb4c25 100644 --- a/paddlespeech/t2s/datasets/am_batch_fn.py +++ b/paddlespeech/t2s/datasets/am_batch_fn.py @@ -68,7 +68,7 @@ def erniesat_batch_fn(examples, mean_phn_span: int=8, seg_emb: bool=False, text_masking: bool=False): - # fields = ["text", "text_lengths", "speech", "speech_lengths", "durations", "pitch", "energy"] + # fields = ["text", "text_lengths", "speech", "speech_lengths", "align_start", "align_end"] text = [np.array(item["text"], dtype=np.int64) for item in examples] speech = [np.array(item["speech"], dtype=np.float32) for item in examples] diff --git a/paddlespeech/t2s/exps/ernie_sat/train.py b/paddlespeech/t2s/exps/ernie_sat/train.py index 5d8eadb684ea616bca9ed47d951c6c2a5bc2ee15..ccd1245e1da504866f5aa59c3e4ba4cc591eb2b2 100644 --- a/paddlespeech/t2s/exps/ernie_sat/train.py +++ b/paddlespeech/t2s/exps/ernie_sat/train.py @@ -116,13 +116,6 @@ def train_sp(args, config): odim = config.n_mels model = ErnieSAT(idim=vocab_size, odim=odim, **config["model"]) - # model_path = "/home/yuantian01/PaddleSpeech_ERNIE_SAT/PaddleSpeech/examples/ernie_sat/pretrained_model/paddle_checkpoint_en/model.pdparams" - # state_dict = paddle.load(model_path) - # new_state_dict = {} - # for key, value in state_dict.items(): - # new_key = "model." + key - # new_state_dict[new_key] = value - # model.set_state_dict(new_state_dict) if world_size > 1: model = DataParallel(model)