diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 775429e0c0657221af8950a5acb5829be3cfde88..926b92da6445a5b6ed5cef8af9d05879ac53c296 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -75,15 +75,15 @@ paddle.fluid.layers.fc (ArgSpec(args=['input', 'size', 'num_flatten_dims', 'para paddle.fluid.layers.embedding (ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')), ('document', '89c2c55a0b0656b106064048e068e77a')) paddle.fluid.layers.dynamic_lstm (ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None)), ('document', 'dfbb624f85015df29e994ca6999e8ff6')) paddle.fluid.layers.dynamic_lstmp (ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None)), ('document', 'b4b608b986eb9617aa0525e1be21d32d')) -paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)), ('document', '4ec4845fd7d991bcac822f8b0dfc101f')) -paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)), ('document', 'e0e2439f7af069b57badca18a6ba60b8')) +paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)), ('document', '83617c165827e030636c80486d5de6f3')) +paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)), ('document', '33974b9bfa69f2f1eb85e6f956dff04e')) paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,)), ('document', '34f96be41684b0959897a9e735997e20')) paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)), ('document', '462ddf2435e3392334e0c05ae57a01c4')) paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', 'cefab7c23ee5582727e8b22dffbafac8')) paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', '535f1f6213dd7ca0fe5ed7cb4718c0e3')) paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6263dfdeb6c670fa0922c9cbc8fb1bf4')) paddle.fluid.layers.square_error_cost (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', 'f273bb26833ee88b349c4b8083e1dc67')) -paddle.fluid.layers.chunk_eval (ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ee152a7ba3036e7b9ede9184545179b4')) +paddle.fluid.layers.chunk_eval (ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,)), ('document', '5aa25d023acea1fb49a0de56be86990b')) paddle.fluid.layers.sequence_conv (ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None)), ('document', 'b6543768e1afaa2ecb869709d6e9c7e2')) paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '8ca6121acd6d23cd8806a93f493c2e17')) paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '37042620f9bd3a2da6e5d3138b2f724b')) @@ -96,14 +96,14 @@ paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_t paddle.fluid.layers.adaptive_pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '55db6ae7275fb9678a6814aebab81a9c')) paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)), ('document', '581f9f99cd7f4b0cab9e0aad5fa0ea24')) paddle.fluid.layers.data_norm (ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, None, None, None, False)), ('document', '2460b30fb87037555208fa8ac6fc1787')) -paddle.fluid.layers.beam_search_decode (ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b0b8d53821716cd50c42e09b593f3feb')) +paddle.fluid.layers.beam_search_decode (ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '83e08f21af41ac8bac37aeab1f86fdd0')) paddle.fluid.layers.conv2d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', '03993955ab1e6d3044c44e6f17fc85e9')) paddle.fluid.layers.conv3d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', 'ec113c6a3686ac94f8fccd1a7953d445')) paddle.fluid.layers.sequence_expand (ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '79c375214fa427faac504043d162dae9')) paddle.fluid.layers.sequence_expand_as (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9d2611f84ab364c5da545e6a82f1770a')) paddle.fluid.layers.sequence_pad (ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6a1adf3067b20f6e4bcb354d71c19184')) paddle.fluid.layers.sequence_unpad (ArgSpec(args=['x', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd12803c903c99aa36ec03aaac5f0cc5b')) -paddle.fluid.layers.lstm_unit (ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None)), ('document', '027723966f3ef0d7bc598f22287a96cc')) +paddle.fluid.layers.lstm_unit (ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None)), ('document', '409d1c2ca874a22511258175649d2b7f')) paddle.fluid.layers.reduce_sum (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'b69998ce3ff4980fb21da0df05565f1b')) paddle.fluid.layers.reduce_mean (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'd4d80dd98a1a5839f41eeb3a0f85f370')) paddle.fluid.layers.reduce_max (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', '66a622db727551761ce4eb73eaa7f6a4')) @@ -128,7 +128,7 @@ paddle.fluid.layers.im2sequence (ArgSpec(args=['input', 'filter_size', 'stride', paddle.fluid.layers.nce (ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)), ('document', '32b3c442da0f3df682b5fcac10468116')) paddle.fluid.layers.sampled_softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'num_samples', 'num_true', 'remove_accidental_hits', 'use_customized_samples', 'customized_samples', 'customized_probabilities', 'seed'], varargs=None, keywords=None, defaults=(1, True, False, None, None, 0)), ('document', '5db30b8a74e8c93687943a3e8d221da0')) paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)), ('document', '80641ee6810b1cdc3fd6e14fc89ecc9d')) -paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', 'b350b9a30a18e7efd7e1bb740eef6996')) +paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', '1270395ce97a4e1b556104abbb14f096')) paddle.fluid.layers.row_conv (ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)), ('document', '17485788fffe4e2d36dc58c2ac8d174e')) paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '2c4d1ae83da6ed35e3b36ba1b3b51d23')) paddle.fluid.layers.layer_norm (ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)), ('document', 'de6a906950bae9f3c245cb744d22b94e')) @@ -218,7 +218,7 @@ paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', '97bf4353bb046a5629308a38f98ac204')) paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5d16663e096d7f04954c70ce1cc5e195')) paddle.fluid.layers.log_loss (ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)), ('document', 'af541e9263be61ce0e40df58d1b69294')) -paddle.fluid.layers.add_position_encoding (ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '4b9c2e8af5817937d831820874b5aa77')) +paddle.fluid.layers.add_position_encoding (ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e399f9436fed5f7ff480d8532e42c937')) paddle.fluid.layers.bilinear_tensor_product (ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'cd0bd55ef1e1762aca25ec972d34d378')) paddle.fluid.layers.merge_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'dc63315b84f591ac79ecca0c3632027a')) paddle.fluid.layers.get_tensor_from_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '7ffc849e71f31dfe29030ff94e662de6')) @@ -440,7 +440,7 @@ paddle.fluid.transpiler.DistributeTranspilerConfig.__init__ paddle.fluid.nets.simple_img_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True)), ('document', '13f01ff80e8dfbd3427d90cf49bc62eb')) paddle.fluid.nets.sequence_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type', 'bias_attr'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max', None)), ('document', 'd6a1e527b53f5cc15594fee307dfc5cf')) paddle.fluid.nets.glu (ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)), ('document', '6486b2595300fc3305b5a1f0ac363dce')) -paddle.fluid.nets.scaled_dot_product_attention (ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)), ('document', '921714c9bfb351b41403418265393203')) +paddle.fluid.nets.scaled_dot_product_attention (ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)), ('document', 'b1a07a0000eb9103e3a143ca8c13de5b')) paddle.fluid.nets.img_conv_group (ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True)), ('document', '5178bc1b4d302192597a5efbae13d902')) paddle.fluid.optimizer.SGDOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.SGDOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 797b541134cdd0bc76326fab7b2666337a09ade3..65f9a2a212c0a256c7d3d2269675808691ce8fd8 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1041,6 +1041,8 @@ def dynamic_gru(input, .. code-block:: python + import paddle.fluid as fluid + dict_dim, emb_dim = 128, 64 data = fluid.layers.data(name='sequence', shape=[1], dtype='int32', lod_level=1) @@ -1178,10 +1180,17 @@ def gru_unit(input, .. code-block:: python - # assuming we have x_t_data and prev_hidden of size=10 - x_t = fluid.layers.fc(input=x_t_data, size=30) - hidden_val, r_h_val, gate_val = fluid.layers.gru_unit(input=x_t, - hidden = prev_hidden) + import paddle.fluid as fluid + + dict_dim, emb_dim = 128, 64 + data = fluid.layers.data(name='step_data', shape=[1], dtype='int32') + emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) + hidden_dim = 512 + x = fluid.layers.fc(input=emb, size=hidden_dim * 3) + pre_hidden = fluid.layers.data( + name='pre_hidden', shape=[hidden_dim], dtype='float32') + hidden = fluid.layers.gru_unit( + input=x, hidden=pre_hidden, size=hidden_dim * 3) """ activation_dict = dict( @@ -1724,10 +1733,21 @@ def chunk_eval(input, Examples: .. code-block:: python + import paddle.fluid as fluid + + dict_size = 10000 + label_dict_len = 7 + sequence = fluid.layers.data( + name='id', shape=[1], lod_level=1, dtype='int64') + embedding = fluid.layers.embedding( + input=sequence, size=[dict_size, 512]) + hidden = fluid.layers.fc(input=embedding, size=512) + label = fluid.layers.data( + name='label', shape=[1], lod_level=1, dtype='int32') crf = fluid.layers.linear_chain_crf( - input=hidden, label=label, param_attr=ParamAttr(name="crfw")) + input=hidden, label=label, param_attr=fluid.ParamAttr(name="crfw")) crf_decode = fluid.layers.crf_decoding( - input=hidden, param_attr=ParamAttr(name="crfw")) + input=hidden, param_attr=fluid.ParamAttr(name="crfw")) fluid.layers.chunk_eval( input=crf_decode, label=label, @@ -4281,16 +4301,25 @@ def beam_search(pre_ids, Examples: .. code-block:: python + import paddle.fluid as fluid + # Suppose `probs` contains predicted results from the computation # cell and `pre_ids` and `pre_scores` is the output of beam_search # at previous step. - topk_scores, topk_indices = layers.topk(probs, k=beam_size) - accu_scores = layers.elementwise_add( - x=layers.log(x=topk_scores)), - y=layers.reshape( - pre_scores, shape=[-1]), + beam_size = 4 + end_id = 1 + pre_ids = fluid.layers.data( + name='pre_id', shape=[1], lod_level=2, dtype='int64') + pre_scores = fluid.layers.data( + name='pre_scores', shape=[1], lod_level=2, dtype='float32') + probs = fluid.layers.data( + name='probs', shape=[10000], dtype='float32') + topk_scores, topk_indices = fluid.layers.topk(probs, k=beam_size) + accu_scores = fluid.layers.elementwise_add( + x=fluid.layers.log(x=topk_scores), + y=fluid.layers.reshape(pre_scores, shape=[-1]), axis=0) - selected_ids, selected_scores = layers.beam_search( + selected_ids, selected_scores = fluid.layers.beam_search( pre_ids=pre_ids, pre_scores=pre_scores, ids=topk_indices, @@ -4364,9 +4393,13 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None): Examples: .. code-block:: python + import paddle.fluid as fluid + # Suppose `ids` and `scores` are LodTensorArray variables reserving # the selected ids and scores of all steps - finished_ids, finished_scores = layers.beam_search_decode( + ids = fluid.layers.create_array(dtype='int64') + scores = fluid.layers.create_array(dtype='float32') + finished_ids, finished_scores = fluid.layers.beam_search_decode( ids, scores, beam_size=5, end_id=0) """ helper = LayerHelper('beam_search_decode', **locals()) @@ -4464,12 +4497,19 @@ def lstm_unit(x_t, .. code-block:: python - x_t = fluid.layers.fc(input=x_t_data, size=10) - prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=30) - prev_cell = fluid.layers.fc(input=prev_cell_data, size=30) - hidden_value, cell_value = fluid.layers.lstm_unit(x_t=x_t, - hidden_t_prev=prev_hidden, - cell_t_prev=prev_cell) + import paddle.fluid as fluid + + dict_dim, emb_dim, hidden_dim = 128, 64, 512 + data = fluid.layers.data(name='step_data', shape=[1], dtype='int32') + x = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) + pre_hidden = fluid.layers.data( + name='pre_hidden', shape=[hidden_dim], dtype='float32') + pre_cell = fluid.layers.data( + name='pre_cell', shape=[hidden_dim], dtype='float32') + hidden = fluid.layers.lstm_unit( + x_t=x, + hidden_t_prev=pre_hidden, + cell_t_prev=pre_cell) """ helper = LayerHelper('lstm_unit', **locals()) @@ -10555,7 +10595,15 @@ def add_position_encoding(input, alpha, beta, name=None): Examples: .. code-block:: python - position_tensor = fluid.layers.add_position_encoding(input=tensor) + import paddle.fluid as fluid + + tensor = fluid.layers.data( + name='tensor', + shape=[32, 64, 512], + dtype='float32', + append_batch_size=False) + position_tensor = fluid.layers.add_position_encoding( + input=tensor, alpha=1.0, beta=1.0) """ helper = LayerHelper('add_position_encoding', **locals()) diff --git a/python/paddle/fluid/nets.py b/python/paddle/fluid/nets.py index 5e511ed2eb9ffaeada45046dbe6c2b7c15ae6d16..20fbd079f76de498a092a7b0ed8926e3f13d7bb1 100644 --- a/python/paddle/fluid/nets.py +++ b/python/paddle/fluid/nets.py @@ -390,6 +390,8 @@ def scaled_dot_product_attention(queries, Examples: .. code-block:: python + import paddle.fluid as fluid + queries = fluid.layers.data(name="queries", shape=[3, 5, 9], dtype="float32", @@ -516,7 +518,7 @@ def scaled_dot_product_attention(queries, key_dim_per_head = keys.shape[-1] // num_heads scaled_q = layers.scale(x=q, scale=key_dim_per_head**-0.5) - product = layers.matmul(x=k, y=scaled_q, transpose_y=True) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) weights = layers.reshape( x=layers.reshape(