提交 09123cd9 编写于 作者: Y Youwei Song 提交者: hong

fix input shape for new Embedding (#4048)

test=develop
上级 d7668699
...@@ -267,11 +267,12 @@ class UnifiedTransformer(ModelBase): ...@@ -267,11 +267,12 @@ class UnifiedTransformer(ModelBase):
Create attention mask. Create attention mask.
@param : input_mask @param : input_mask
@type : Variable(shape: [batch_size, max_seq_len, 1]) @type : Variable(shape: [batch_size, max_seq_len])
@param : auto_regressive @param : auto_regressive
@type : bool @type : bool
""" """
input_mask = fluid.layers.unsqueeze(input=input_mask, axes=[2])
seq_len = input_mask.shape[1] seq_len = input_mask.shape[1]
input_mask = layers.cast(input_mask, self._dtype) input_mask = layers.cast(input_mask, self._dtype)
......
...@@ -67,10 +67,10 @@ def main(): ...@@ -67,10 +67,10 @@ def main():
place = fluid.CPUPlace() place = fluid.CPUPlace()
with fluid.dygraph.guard(place): with fluid.dygraph.guard(place):
model = Embedder("Embedder", 10, 20, 20, 20, 20) model = Embedder("Embedder", 10, 20, 20, 20, 20)
token_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10, 1]).astype("int64")) token_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10]).astype("int64"))
pos_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10, 1]).astype("int64")) pos_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10]).astype("int64"))
type_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10, 1]).astype("int64")) type_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10]).astype("int64"))
turn_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10, 1]).astype("int64")) turn_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10]).astype("int64"))
out = model(token_inp, pos_inp, type_inp, turn_inp) out = model(token_inp, pos_inp, type_inp, turn_inp)
print(out) print(out)
......
...@@ -99,7 +99,6 @@ def main(): ...@@ -99,7 +99,6 @@ def main():
test_loader = DataLoader(test_dataset, hparams.Trainer, collate_fn=collate_fn, is_test=hparams.do_infer) test_loader = DataLoader(test_dataset, hparams.Trainer, collate_fn=collate_fn, is_test=hparams.do_infer)
def to_tensor(array): def to_tensor(array):
array = np.expand_dims(array, -1)
return fluid.dygraph.to_variable(array) return fluid.dygraph.to_variable(array)
if hparams.use_data_distributed: if hparams.use_data_distributed:
......
...@@ -273,7 +273,6 @@ def create_batch(batch): ...@@ -273,7 +273,6 @@ def create_batch(batch):
x_batch = np.array( x_batch = np.array(
[_pad(x[0], max_input_len) for x in batch], dtype=np.int64) [_pad(x[0], max_input_len) for x in batch], dtype=np.int64)
x_batch = np.expand_dims(x_batch, axis=-1)
mel_batch = np.array( mel_batch = np.array(
[_pad_2d( [_pad_2d(
...@@ -318,7 +317,7 @@ def create_batch(batch): ...@@ -318,7 +317,7 @@ def create_batch(batch):
done = np.expand_dims(np.expand_dims(done, axis=1), axis=1) done = np.expand_dims(np.expand_dims(done, axis=1), axis=1)
if multi_speaker: if multi_speaker:
speaker_ids = np.expand_dims(np.array([x[3] for x in batch]), axis=-1) speaker_ids = np.array([x[3] for x in batch])
return (x_batch, input_lengths, mel_batch, y_batch, text_positions, return (x_batch, input_lengths, mel_batch, y_batch, text_positions,
frame_positions, done, target_lengths, speaker_ids) frame_positions, done, target_lengths, speaker_ids)
else: else:
......
...@@ -206,7 +206,7 @@ class Encoder(dg.Layer): ...@@ -206,7 +206,7 @@ class Encoder(dg.Layer):
Encode text sequence. Encode text sequence.
Args: Args:
x (Variable): Shape(B, T_enc, 1), dtype: int64. Ihe input text x (Variable): Shape(B, T_enc), dtype: int64. Ihe input text
indices. T_enc means the timesteps of decoder input x. indices. T_enc means the timesteps of decoder input x.
speaker_embed (Variable, optional): Shape(Batch_size, speaker_dim), speaker_embed (Variable, optional): Shape(Batch_size, speaker_dim),
dtype: float32. Speaker embeddings. This arg is not None only dtype: float32. Speaker embeddings. This arg is not None only
...@@ -1228,7 +1228,7 @@ class DeepVoiceTTS(dg.Layer): ...@@ -1228,7 +1228,7 @@ class DeepVoiceTTS(dg.Layer):
valid lengths for each example in text_sequences. valid lengths for each example in text_sequences.
mel_inputs (Variable): Shape(B, C_mel, 1, T_mel), ground truth mel_inputs (Variable): Shape(B, C_mel, 1, T_mel), ground truth
mel-spectrogram, which is used as decoder inputs when training. mel-spectrogram, which is used as decoder inputs when training.
speaker_indices (Variable, optional): Shape(Batch_size, 1), speaker_indices (Variable, optional): Shape(Batch_size),
dtype: int64. Speaker index for each example. This arg is not dtype: int64. Speaker index for each example. This arg is not
None only when the model is a multispeaker model. None only when the model is a multispeaker model.
text_positions (Variable): Shape(B, T_enc, 1), dtype: int64. text_positions (Variable): Shape(B, T_enc, 1), dtype: int64.
......
...@@ -433,7 +433,7 @@ class OCRAttention(fluid.dygraph.Layer): ...@@ -433,7 +433,7 @@ class OCRAttention(fluid.dygraph.Layer):
decoder_boot = self.fc(backward_first) decoder_boot = self.fc(backward_first)
label_in = fluid.layers.reshape(label_in, [-1, 1], inplace=False) label_in = fluid.layers.reshape(label_in, [-1], inplace=False)
trg_embedding = self.embedding(label_in) trg_embedding = self.embedding(label_in)
trg_embedding = fluid.layers.reshape( trg_embedding = fluid.layers.reshape(
......
...@@ -360,7 +360,7 @@ def train_ptb_lm(): ...@@ -360,7 +360,7 @@ def train_ptb_lm():
train_data_iter = reader.get_data_iter(data, batch_size, num_steps) train_data_iter = reader.get_data_iter(data, batch_size, num_steps)
for batch_id, batch in enumerate(train_data_iter): for batch_id, batch in enumerate(train_data_iter):
x_data, y_data = batch x_data, y_data = batch
x_data = x_data.reshape((-1, num_steps, 1)) x_data = x_data.reshape((-1, num_steps))
y_data = y_data.reshape((-1, 1)) y_data = y_data.reshape((-1, 1))
x = to_variable(x_data) x = to_variable(x_data)
y = to_variable(y_data) y = to_variable(y_data)
...@@ -399,7 +399,7 @@ def train_ptb_lm(): ...@@ -399,7 +399,7 @@ def train_ptb_lm():
start_time = time.time() start_time = time.time()
for batch_id, batch in enumerate(train_data_iter): for batch_id, batch in enumerate(train_data_iter):
x_data, y_data = batch x_data, y_data = batch
x_data = x_data.reshape((-1, num_steps, 1)) x_data = x_data.reshape((-1, num_steps))
y_data = y_data.reshape((-1, 1)) y_data = y_data.reshape((-1, 1))
x = to_variable(x_data) x = to_variable(x_data)
y = to_variable(y_data) y = to_variable(y_data)
......
...@@ -162,7 +162,7 @@ def train(): ...@@ -162,7 +162,7 @@ def train():
'constant', 'constant',
constant_values=(args.vocab_size)) constant_values=(args.vocab_size))
for x in data for x in data
]).astype('int64').reshape(-1, 1)) ]).astype('int64').reshape(-1))
label = to_variable( label = to_variable(
np.array([x[1] for x in data]).astype('int64').reshape( np.array([x[1] for x in data]).astype('int64').reshape(
args.batch_size, 1)) args.batch_size, 1))
...@@ -206,7 +206,7 @@ def train(): ...@@ -206,7 +206,7 @@ def train():
eval_label = to_variable( eval_label = to_variable(
np.array([x[1] for x in eval_data]).astype( np.array([x[1] for x in eval_data]).astype(
'int64').reshape(args.batch_size, 1)) 'int64').reshape(args.batch_size, 1))
eval_doc = to_variable(eval_np_doc.reshape(-1, 1)) eval_doc = to_variable(eval_np_doc.reshape(-1))
eval_avg_cost, eval_prediction, eval_acc = model( eval_avg_cost, eval_prediction, eval_acc = model(
eval_doc, eval_label) eval_doc, eval_label)
eval_np_mask = ( eval_np_mask = (
......
...@@ -114,7 +114,7 @@ class CNN(fluid.dygraph.Layer): ...@@ -114,7 +114,7 @@ class CNN(fluid.dygraph.Layer):
def forward(self, inputs, label=None): def forward(self, inputs, label=None):
emb = self.embedding(inputs) emb = self.embedding(inputs)
o_np_mask = (inputs.numpy() != self.dict_dim).astype('float32') o_np_mask = (np.expand_dims(inputs.numpy(), -1) != self.dict_dim).astype('float32')
mask_emb = fluid.layers.expand( mask_emb = fluid.layers.expand(
to_variable(o_np_mask), [1, self.hid_dim]) to_variable(o_np_mask), [1, self.hid_dim])
emb = emb * mask_emb emb = emb * mask_emb
...@@ -155,7 +155,7 @@ class BOW(fluid.dygraph.Layer): ...@@ -155,7 +155,7 @@ class BOW(fluid.dygraph.Layer):
def forward(self, inputs, label=None): def forward(self, inputs, label=None):
emb = self.embedding(inputs) emb = self.embedding(inputs)
o_np_mask = (inputs.numpy() != self.dict_dim).astype('float32') o_np_mask = (np.expand_dims(inputs.numpy(), -1) != self.dict_dim).astype('float32')
mask_emb = fluid.layers.expand( mask_emb = fluid.layers.expand(
to_variable(o_np_mask), [1, self.hid_dim]) to_variable(o_np_mask), [1, self.hid_dim])
emb = emb * mask_emb emb = emb * mask_emb
...@@ -205,7 +205,7 @@ class GRU(fluid.dygraph.Layer): ...@@ -205,7 +205,7 @@ class GRU(fluid.dygraph.Layer):
def forward(self, inputs, label=None): def forward(self, inputs, label=None):
emb = self.embedding(inputs) emb = self.embedding(inputs)
o_np_mask = to_variable( o_np_mask = to_variable(
inputs.numpy() != self.dict_dim).astype('float32') np.expand_dims(inputs.numpy(), -1) != self.dict_dim).astype('float32')
mask_emb = fluid.layers.expand( mask_emb = fluid.layers.expand(
to_variable(o_np_mask), [1, self.hid_dim]) to_variable(o_np_mask), [1, self.hid_dim])
emb = emb * mask_emb emb = emb * mask_emb
...@@ -258,7 +258,7 @@ class BiGRU(fluid.dygraph.Layer): ...@@ -258,7 +258,7 @@ class BiGRU(fluid.dygraph.Layer):
def forward(self, inputs, label=None): def forward(self, inputs, label=None):
emb = self.embedding(inputs) emb = self.embedding(inputs)
o_np_mask = to_variable( o_np_mask = to_variable(
inputs.numpy() != self.dict_dim).astype('float32') np.expand_dims(inputs.numpy(), -1) != self.dict_dim).astype('float32')
mask_emb = fluid.layers.expand( mask_emb = fluid.layers.expand(
to_variable(o_np_mask), [1, self.hid_dim]) to_variable(o_np_mask), [1, self.hid_dim])
emb = emb * mask_emb emb = emb * mask_emb
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册