diff --git a/.gitignore b/.gitignore index 13dd63d516ad2b787db647c273ad9b9f4abbca1c..909b4a75685e936496ee2edea82ec448d27b9994 100644 --- a/.gitignore +++ b/.gitignore @@ -132,7 +132,7 @@ dmypy.json .pyre/ # Shell, vim, and output folder -*.sh +#*.sh *.swp runs syn_audios diff --git a/examples/fastspeech/train.py b/examples/fastspeech/train.py index 48c26eb8b0cdb49ba47248ca4df525bd2ae7f6e4..52b5725a1e243992ec966b128ec5d9626de8f9b9 100644 --- a/examples/fastspeech/train.py +++ b/examples/fastspeech/train.py @@ -13,10 +13,12 @@ from tensorboardX import SummaryWriter import paddle.fluid.dygraph as dg import paddle.fluid.layers as layers import paddle.fluid as fluid -from parakeet.models.dataloader.ljspeech import LJSpeechLoader -from parakeet.models.transformer_tts.transformerTTS import TransformerTTS +from parakeet.models.transformer_tts.transformer_tts import TransformerTTS from parakeet.models.fastspeech.fastspeech import FastSpeech from parakeet.models.fastspeech.utils import get_alignment +import sys +sys.path.append("../transformer_tts") +from data import LJSpeechLoader def load_checkpoint(step, model_path): model_dict, opti_dict = fluid.dygraph.load_dygraph(os.path.join(model_path, step)) diff --git a/examples/fastspeech/train.sh b/examples/fastspeech/train.sh index 31a2fdb87556dd2970d787a1d5e092bef616b137..d9cf24eacc4e6b08d829da9209bdb51ce6b7d376 100644 --- a/examples/fastspeech/train.sh +++ b/examples/fastspeech/train.sh @@ -1,4 +1,3 @@ - # train model # if you wish to resume from an exists model, uncomment --checkpoint_path and --fastspeech_step #CUDA_VISIBLE_DEVICES=0,1,2,3 \ diff --git a/parakeet/models/dataloader/ljspeech.py b/examples/transformer_tts/data.py similarity index 100% rename from parakeet/models/dataloader/ljspeech.py rename to examples/transformer_tts/data.py diff --git a/examples/transformer_tts/synthesis.py b/examples/transformer_tts/synthesis.py index d0c155cfa3183efebb24ab700e0d264b7f9cece9..dc80dc7b381dcb1ac539fd8426402d45dd9800ab 100644 --- a/examples/transformer_tts/synthesis.py +++ b/examples/transformer_tts/synthesis.py @@ -14,7 +14,7 @@ from pprint import pprint from collections import OrderedDict from parakeet import audio from parakeet.models.transformer_tts.vocoder import Vocoder -from parakeet.models.transformer_tts.transformerTTS import TransformerTTS +from parakeet.models.transformer_tts.transformer_tts import TransformerTTS def load_checkpoint(step, model_path): model_dict, _ = fluid.dygraph.load_dygraph(os.path.join(model_path, step)) diff --git a/examples/transformer_tts/train_transformer.py b/examples/transformer_tts/train_transformer.py index bcfa16f0094a5a6b508987edf9d04816312a96bf..d25820988db09301c3e6dabe6e1fb80a5b2b9b86 100644 --- a/examples/transformer_tts/train_transformer.py +++ b/examples/transformer_tts/train_transformer.py @@ -13,8 +13,8 @@ import paddle.fluid as fluid import paddle.fluid.dygraph as dg import paddle.fluid.layers as layers from parakeet.modules.utils import cross_entropy -from parakeet.models.dataloader.ljspeech import LJSpeechLoader -from parakeet.models.transformer_tts.transformerTTS import TransformerTTS +from data import LJSpeechLoader +from parakeet.models.transformer_tts.transformer_tts import TransformerTTS def load_checkpoint(step, model_path): model_dict, opti_dict = fluid.dygraph.load_dygraph(os.path.join(model_path, step)) diff --git a/examples/transformer_tts/train_vocoder.py b/examples/transformer_tts/train_vocoder.py index b2db5fc26266b05e7da9052de4ef4dfcc34cb2ca..cc32ca90c2095316b7d836ed55db40026f64d49b 100644 --- a/examples/transformer_tts/train_vocoder.py +++ b/examples/transformer_tts/train_vocoder.py @@ -10,7 +10,7 @@ from pprint import pprint import paddle.fluid as fluid import paddle.fluid.dygraph as dg import paddle.fluid.layers as layers -from parakeet.models.dataloader.ljspeech import LJSpeechLoader +from data import LJSpeechLoader from parakeet.models.transformer_tts.vocoder import Vocoder def load_checkpoint(step, model_path): diff --git a/parakeet/models/dataloader/__init__.py b/parakeet/models/dataloader/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/parakeet/models/fastspeech/decoder.py b/parakeet/models/fastspeech/decoder.py index aa9910350e19a7b179eccabfb69a5253447ecd33..363268f65c764c62e420864094a9498b088800ac 100644 --- a/parakeet/models/fastspeech/decoder.py +++ b/parakeet/models/fastspeech/decoder.py @@ -1,7 +1,7 @@ import paddle.fluid.dygraph as dg import paddle.fluid as fluid from parakeet.modules.utils import * -from parakeet.models.fastspeech.FFTBlock import FFTBlock +from parakeet.models.fastspeech.fft_block import FFTBlock class Decoder(dg.Layer): def __init__(self, diff --git a/parakeet/models/fastspeech/encoder.py b/parakeet/models/fastspeech/encoder.py index e82e0184ca2270c561baab17d17b8bad8f9d6a01..028cf782c878c11480d4b138bfdb69c5f45a218a 100644 --- a/parakeet/models/fastspeech/encoder.py +++ b/parakeet/models/fastspeech/encoder.py @@ -1,7 +1,7 @@ import paddle.fluid.dygraph as dg import paddle.fluid as fluid from parakeet.modules.utils import * -from parakeet.models.fastspeech.FFTBlock import FFTBlock +from parakeet.models.fastspeech.fft_block import FFTBlock class Encoder(dg.Layer): def __init__(self, diff --git a/parakeet/models/fastspeech/fastspeech.py b/parakeet/models/fastspeech/fastspeech.py index ee120b9ab19f93694d0c1a3b908fc5788157bb88..4a01b955f4639502597c602e5b7e05b5facf29fc 100644 --- a/parakeet/models/fastspeech/fastspeech.py +++ b/parakeet/models/fastspeech/fastspeech.py @@ -3,7 +3,7 @@ import paddle.fluid.dygraph as dg import paddle.fluid as fluid from parakeet.g2p.text.symbols import symbols from parakeet.models.transformer_tts.post_convnet import PostConvNet -from parakeet.models.fastspeech.LengthRegulator import LengthRegulator +from parakeet.models.fastspeech.length_regulator import LengthRegulator from parakeet.models.fastspeech.encoder import Encoder from parakeet.models.fastspeech.decoder import Decoder diff --git a/parakeet/models/fastspeech/FFTBlock.py b/parakeet/models/fastspeech/fft_block.py similarity index 100% rename from parakeet/models/fastspeech/FFTBlock.py rename to parakeet/models/fastspeech/fft_block.py diff --git a/parakeet/models/fastspeech/LengthRegulator.py b/parakeet/models/fastspeech/length_regulator.py similarity index 100% rename from parakeet/models/fastspeech/LengthRegulator.py rename to parakeet/models/fastspeech/length_regulator.py diff --git a/parakeet/models/transformer_tts/CBHG.py b/parakeet/models/transformer_tts/cbhg.py similarity index 100% rename from parakeet/models/transformer_tts/CBHG.py rename to parakeet/models/transformer_tts/cbhg.py diff --git a/parakeet/models/transformer_tts/transformerTTS.py b/parakeet/models/transformer_tts/transformer_tts.py similarity index 71% rename from parakeet/models/transformer_tts/transformerTTS.py rename to parakeet/models/transformer_tts/transformer_tts.py index b2753265f2fea552e63f8a6056c8f9dce21c7520..bf2924a7092ca82ca2bfecc322bb84cf74f6c3f5 100644 --- a/parakeet/models/transformer_tts/transformerTTS.py +++ b/parakeet/models/transformer_tts/transformer_tts.py @@ -11,16 +11,9 @@ class TransformerTTS(dg.Layer): self.config = config def forward(self, characters, mel_input, pos_text, pos_mel): - # key (batch_size, seq_len, channel) - # c_mask (batch_size, seq_len) - # attns_enc (channel / 2, seq_len, seq_len) - + key, c_mask, attns_enc = self.encoder(characters, pos_text) - # mel_output/postnet_output (batch_size, mel_len, n_mel) - # attn_probs (128, mel_len, seq_len) - # stop_preds (batch_size, mel_len, 1) - # attns_dec (128, mel_len, mel_len) mel_output, postnet_output, attn_probs, stop_preds, attns_dec = self.decoder(key, key, mel_input, c_mask, pos_mel) return mel_output, postnet_output, attn_probs, stop_preds, attns_enc, attns_dec diff --git a/parakeet/models/transformer_tts/vocoder.py b/parakeet/models/transformer_tts/vocoder.py index 690d4ce023b1c4be8774198dfcde68650502343a..0a3f14aba959146eda59513c6cf5793a3373b074 100644 --- a/parakeet/models/transformer_tts/vocoder.py +++ b/parakeet/models/transformer_tts/vocoder.py @@ -2,7 +2,7 @@ import paddle.fluid.dygraph as dg import paddle.fluid as fluid from parakeet.modules.customized import Conv1D from parakeet.modules.utils import * -from parakeet.models.transformer_tts.CBHG import CBHG +from parakeet.models.transformer_tts.cbhg import CBHG class Vocoder(dg.Layer): """