diff --git a/paddle/fluid/operators/lookup_table_v2_op.cc b/paddle/fluid/operators/lookup_table_v2_op.cc index 4aadd4b8c0015368dcea7f0787271413bcdee172..b336735d96e9d90eb94a66dc7cfb8402d9da67c9 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.cc +++ b/paddle/fluid/operators/lookup_table_v2_op.cc @@ -71,8 +71,7 @@ class LookupTableV2OpMaker : public framework::OpProtoAndCheckerMaker { "which is a learnable parameter."); AddInput("Ids", "An input with type int64 " - "contains the ids to be looked up in W. " - "The last dimension size must be 1."); + "contains the ids to be looked up in W."); AddOutput("Out", "The lookup results, which have the same type as W."); AddAttr("is_sparse", "(boolean, default false) " diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 3b32fe591e4d67d29ad89fa05fcc99c185c95f99..33641f6c26ed486a06cb8b0bff19b27aff81349e 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -1361,11 +1361,10 @@ class Embedding(layers.Layer): It automatically constructs a 2D embedding matrix based on the input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` . - This layer requires the last dimension of Tensor shape must be equal to 1. The shape - of output Tensor is generated by replacing the last dimension of the input Tensor shape - with emb_size. + The shape of output Tensor is generated by appending an emb_size dimension to the + last dimension of the input Tensor shape. - The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , + **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , otherwise the program will throw an exception and exit. .. code-block:: text @@ -1373,8 +1372,8 @@ class Embedding(layers.Layer): Case 1: input is a Tensor. padding_idx = -1 - input.data = [[[1], [3]], [[2], [4]], [[4], [127]]] - input.shape = [3, 2, 1] + input.data = [[1, 3], [2, 4], [4, 127] + input.shape = [3, 2] Given size = [128, 16] output is a Tensor: out.shape = [3, 2, 16] @@ -1431,7 +1430,8 @@ class Embedding(layers.Layer): import numpy as np # example 1 - inp_word = np.array([[[1]]]).astype('int64') + inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64') + inp_word.shape # [2, 3] dict_size = 20 with fluid.dygraph.guard(): emb = fluid.dygraph.Embedding( @@ -1440,6 +1440,7 @@ class Embedding(layers.Layer): param_attr='emb.w', is_sparse=False) static_rlt3 = emb(base.to_variable(inp_word)) + static_rlt3.shape # [2, 3, 32] # example 2: load custom or pre-trained word vectors weight_data = np.random.random(size=(128, 100)) # word vectors with numpy format @@ -1495,7 +1496,7 @@ class Embedding(layers.Layer): def forward(self, input): out = self._helper.create_variable_for_type_inference(self._dtype) self._helper.append_op( - type='lookup_table', + type='lookup_table_v2', inputs={'Ids': input, 'W': self._w}, outputs={'Out': out}, @@ -1883,7 +1884,7 @@ class NCE(layers.Layer): window_size = 5 dict_size = 20 label_word = int(window_size // 2) + 1 - inp_word = np.array([[[1]], [[2]], [[3]], [[4]], [[5]]]).astype('int64') + inp_word = np.array([[1], [2], [3], [4], [5]]).astype('int64') nid_freq_arr = np.random.dirichlet(np.ones(20) * 1000).astype('float32') with fluid.dygraph.guard(): @@ -1915,7 +1916,8 @@ class NCE(layers.Layer): param_attr='nce.w', bias_attr='nce.b') - nce_loss3 = nce(embs3, words[label_word]) + wl = fluid.layers.unsqueeze(words[label_word], axes=[0]) + nce_loss3 = nce(embs3, wl) """ diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py index 22bd2e55d28342b81475fd2832a2a407382e2d32..73768340003d95c1a220854dd584d5c85787ea90 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py @@ -395,7 +395,7 @@ class OCRAttention(fluid.dygraph.Layer): backward_first = fluid.layers.reshape( backward_first, [-1, backward_first.shape[2]], inplace=False) decoder_boot = self.fc(backward_first) - label_in = fluid.layers.reshape(label_in, [-1, 1], inplace=False) + label_in = fluid.layers.reshape(label_in, [-1], inplace=False) trg_embedding = self.embedding(label_in) trg_embedding = fluid.layers.reshape( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index 1ef318194f3b78e50c15c5738f4a1ca4ba84e16e..472fae6e49b727721f40f2bf1d26518ab61262c8 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -254,7 +254,6 @@ class TestDygraphPtbRnn(unittest.TestCase): for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') @@ -313,7 +312,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) sgd = SGDOptimizer(learning_rate=1e-3) x = fluid.layers.data( - name="x", shape=[-1, num_steps, 1], dtype='int64') + name="x", shape=[-1, num_steps], dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') init_hidden = fluid.layers.data( name="init_hidden", shape=[1], dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index 56fa716110c20a09f92a14757aa3aed1d71de9a4..c15e893c7d51cc27c85db06ba5db02baa02e03a6 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -246,7 +246,6 @@ class TestDygraphPtbRnn(unittest.TestCase): for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') @@ -328,7 +327,6 @@ class TestDygraphPtbRnn(unittest.TestCase): for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') @@ -433,7 +431,6 @@ class TestDygraphPtbRnn(unittest.TestCase): for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') @@ -537,7 +534,6 @@ class TestDygraphPtbRnn(unittest.TestCase): for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') @@ -652,7 +648,6 @@ class TestDygraphPtbRnn(unittest.TestCase): for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') @@ -745,7 +740,6 @@ class TestDygraphPtbRnn(unittest.TestCase): for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') @@ -846,7 +840,6 @@ class TestDygraphPtbRnn(unittest.TestCase): for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py index 56c7189e31aa5ef741817fa40f28eb6f2abea4f5..89c7f596074c248a515bfcd2f902aa16746bc13a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py @@ -229,11 +229,11 @@ seq_len = ModelHyperParams.max_length # compile time. input_descs = { # The actual data shape of src_word is: - # [batch_size, max_src_len_in_batch, 1] - "src_word": [(batch_size, seq_len, 1), "int64", 2], + # [batch_size, max_src_len_in_batch] + "src_word": [(batch_size, seq_len), "int64", 2], # The actual data shape of src_pos is: - # [batch_size, max_src_len_in_batch, 1] - "src_pos": [(batch_size, seq_len, 1), "int64"], + # [batch_size, max_src_len_in_batch] + "src_pos": [(batch_size, seq_len), "int64"], # This input is used to remove attention weights on paddings in the # encoder. # The actual data shape of src_slf_attn_bias is: @@ -241,12 +241,12 @@ input_descs = { "src_slf_attn_bias": [(batch_size, ModelHyperParams.n_head, seq_len, seq_len), "float32"], # The actual data shape of trg_word is: - # [batch_size, max_trg_len_in_batch, 1] - "trg_word": [(batch_size, seq_len, 1), "int64", + # [batch_size, max_trg_len_in_batch] + "trg_word": [(batch_size, seq_len), "int64", 2], # lod_level is only used in fast decoder. # The actual data shape of trg_pos is: - # [batch_size, max_trg_len_in_batch, 1] - "trg_pos": [(batch_size, seq_len, 1), "int64"], + # [batch_size, max_trg_len_in_batch] + "trg_pos": [(batch_size, seq_len), "int64"], # This input is used to remove attention weights on paddings and # subsequent words in the decoder. # The actual data shape of trg_slf_attn_bias is: @@ -317,17 +317,17 @@ batch_num = 5 np.random.seed = 90 src_word_np = np.arange(1, TrainTaskConfig.batch_size * seq_len + 1).reshape( - [TrainTaskConfig.batch_size, seq_len, 1]).astype('int64') + [TrainTaskConfig.batch_size, seq_len]).astype('int64') src_pos_np = np.random.randint( - 1, seq_len, size=(TrainTaskConfig.batch_size, seq_len, 1), dtype='int64') + 1, seq_len, size=(TrainTaskConfig.batch_size, seq_len), dtype='int64') src_slf_attn_bias_np = np.random.randn(TrainTaskConfig.batch_size, ModelHyperParams.n_head, seq_len, seq_len).astype('float32') trg_word_np = np.arange(1, TrainTaskConfig.batch_size * seq_len + 1).reshape( - [TrainTaskConfig.batch_size, seq_len, 1]).astype('int64') + [TrainTaskConfig.batch_size, seq_len]).astype('int64') trg_pos_np = np.random.randint( - 1, seq_len, size=(TrainTaskConfig.batch_size, seq_len, 1), dtype='int64') + 1, seq_len, size=(TrainTaskConfig.batch_size, seq_len), dtype='int64') trg_slf_attn_bias_np = np.random.randn(TrainTaskConfig.batch_size, ModelHyperParams.n_head, seq_len, seq_len).astype('float32') diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index a06cefef5cbe3ff43674199896cc410c029f983e..ff267698456ac689ad7221dfacba80d25568cfaf 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -842,7 +842,7 @@ class TestLayer(LayerTest): window_size = 5 dict_size = 20 label_word = int(window_size // 2) + 1 - inp_word = np.array([[[1]], [[2]], [[3]], [[4]], [[5]]]).astype('int64') + inp_word = np.array([[1], [2], [3], [4], [5]]).astype('int64') nid_freq_arr = np.random.dirichlet(np.ones(20) * 1000).astype('float32') seed = 1 with self.static_graph(): @@ -850,7 +850,7 @@ class TestLayer(LayerTest): for i in range(window_size): words.append( layers.data( - name='word_{0}'.format(i), shape=[1], dtype='int64')) + name='word_{0}'.format(i), shape=[None], dtype='int64')) sample_weights = layers.fill_constant( shape=[5, 1], dtype='float32', value=1) embs = [] @@ -858,7 +858,7 @@ class TestLayer(LayerTest): if i == label_word: continue - emb = layers.embedding( + emb = fluid.embedding( input=words[i], size=[dict_size, 32], param_attr='emb.w', @@ -866,8 +866,9 @@ class TestLayer(LayerTest): embs.append(emb) embs = layers.concat(input=embs, axis=1) + wl = fluid.layers.unsqueeze(words[label_word], axes=[0]) nce_loss = layers.nce(input=embs, - label=words[label_word], + label=wl, num_total_classes=dict_size, num_neg_samples=2, sampler="custom_dist", @@ -886,7 +887,7 @@ class TestLayer(LayerTest): for i in range(window_size): words.append( layers.data( - name='word_{0}'.format(i), shape=[1], dtype='int64')) + name='word_{0}'.format(i), shape=[None], dtype='int64')) sample_weights = layers.fill_constant( shape=[5, 1], dtype='float32', value=1) emb = nn.Embedding( @@ -914,7 +915,8 @@ class TestLayer(LayerTest): bias_attr='nce.b', sample_weight=sample_weights) - nce_loss2 = nce(embs2, words[label_word]) + wl = fluid.layers.unsqueeze(words[label_word], axes=[0]) + nce_loss2 = nce(embs2, wl) feed_dict = dict() for i in range(len(words)): feed_dict['word_{0}'.format(i)] = inp_word[i] @@ -953,7 +955,8 @@ class TestLayer(LayerTest): bias_attr='nce.b', sample_weight=sample_weights) - dy_rlt = nce(embs3, words[label_word]) + wl = fluid.layers.unsqueeze(words[label_word], axes=[0]) + dy_rlt = nce(embs3, wl) dy_rlt_value = dy_rlt.numpy() self.assertTrue(np.allclose(static_rlt2, static_rlt)) @@ -1004,14 +1007,15 @@ class TestLayer(LayerTest): bias_attr='nce2.b', sample_weight=sample_weights) - nce1_loss = nce1(embs3, words[label_word]) - nce2_loss = nce2(embs3, words[label_word]) + wl = fluid.layers.unsqueeze(words[label_word], axes=[0]) + nce1_loss = nce1(embs3, wl) + nce2_loss = nce2(embs3, wl) self.assertFalse( np.array_equal(nce1_loss.numpy(), nce2_loss.numpy())) nce2.weight.set_value(nce1.weight.numpy()) nce2.bias.set_value(nce1.bias) - nce1_loss = nce1(embs3, words[label_word]) - nce2_loss = nce2(embs3, words[label_word]) + nce1_loss = nce1(embs3, wl) + nce2_loss = nce2(embs3, wl) self.assertTrue( np.array_equal(nce1_loss.numpy(), nce2_loss.numpy())) diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py index 7cfd9bd5a2777d1b7810be3bd0a2f60572342df4..5fc2701ee2dccc54624512c5c8777e3f32c647ae 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -240,7 +240,7 @@ class TestSaveLoadBase(unittest.TestCase): exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) x = fluid.layers.data( - name="x", shape=[-1, num_steps, 1], dtype='int64') + name="x", shape=[-1, num_steps], dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') init_hidden = fluid.layers.data( name="init_hidden", shape=[1], dtype='float32') @@ -341,7 +341,7 @@ class TestSaveLoadPartial(unittest.TestCase): exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) x = fluid.layers.data( - name="x", shape=[-1, num_steps, 1], dtype='int64') + name="x", shape=[-1, num_steps], dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') init_hidden = fluid.layers.data( name="init_hidden", shape=[1], dtype='float32') @@ -451,7 +451,7 @@ class TestSaveLoadSetStateDict(unittest.TestCase): exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) x = fluid.layers.data( - name="x", shape=[-1, num_steps, 1], dtype='int64') + name="x", shape=[-1, num_steps], dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') init_hidden = fluid.layers.data( name="init_hidden", shape=[1], dtype='float32') @@ -552,7 +552,7 @@ class TestProgramStatePartial(unittest.TestCase): exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) x = fluid.layers.data( - name="x", shape=[-1, num_steps, 1], dtype='int64') + name="x", shape=[-1, num_steps], dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') init_hidden = fluid.layers.data( name="init_hidden", shape=[1], dtype='float32')