From 1106fbbd017130a427f5282a7ef60f0ab62aaef4 Mon Sep 17 00:00:00 2001 From: zhoushiyu <31816202+wilhelmzh@users.noreply.github.com> Date: Wed, 9 Oct 2019 17:43:32 +0800 Subject: [PATCH] fix cvm and DataFeedDesc apis (#19994) * fix cvm and DataFeedDesc apis test=develop test=document_preview test=document_fix * add ref and rst tensor type test=develop test=document_preview test=document_fix * change API.spec test=develop test=ducument_fix * fix fluid.data test=develop test=document_preview test=document_fix * change API.spec test=develop test=document_preview test=document_fix * fix cvm example test=develop test=document_preview test=document_fix * change API.spec test=develop test=document_preview test=document_fix * fix cvm example test=develop test=document_fix * fix cvm example test=develop test=document_fix * change api.spec example test=develop test=document_fix --- paddle/fluid/API.spec | 6 ++--- python/paddle/fluid/data_feed_desc.py | 21 +++++++++++------- python/paddle/fluid/layers/nn.py | 32 ++++++++++++++------------- 3 files changed, 33 insertions(+), 26 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 6c0ed994e8f..5c7757f9077 100755 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -55,8 +55,8 @@ paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'b paddle.fluid.DataFeedDesc ('paddle.fluid.data_feed_desc.DataFeedDesc', ('document', '43877a0d9357db94d3dbc7359cbe8c73')) paddle.fluid.DataFeedDesc.__init__ (ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.DataFeedDesc.desc (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '9c6615854b61caa5f0d3e6ccc5e51338')) -paddle.fluid.DataFeedDesc.set_batch_size (ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', 'a34790bff4a2891713ddd644db56418d')) -paddle.fluid.DataFeedDesc.set_dense_slots (ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None), ('document', 'fdd07ce63e72bed57f2c0db5bec5720f')) +paddle.fluid.DataFeedDesc.set_batch_size (ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '4e4652f098a9c7bb0a10cf623108015b')) +paddle.fluid.DataFeedDesc.set_dense_slots (ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None), ('document', '0b7a04bf99ed8983ad7ad4fda19be061')) paddle.fluid.DataFeedDesc.set_use_slots (ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None), ('document', 'c23a79dfa04edd014b477bd4b183da06')) paddle.fluid.CompiledProgram ('paddle.fluid.compiler.CompiledProgram', ('document', '598d294107d44d7620bce76527a92c37')) paddle.fluid.CompiledProgram.__init__ (ArgSpec(args=['self', 'program_or_graph', 'build_strategy'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) @@ -300,7 +300,7 @@ paddle.fluid.layers.kldiv_loss (ArgSpec(args=['x', 'target', 'reduction', 'name' paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l2_reg'], varargs=None, keywords=None, defaults=(0.002,)), ('document', 'a41a93253c937697e900e19af172490d')) paddle.fluid.layers.pixel_shuffle (ArgSpec(args=['x', 'upscale_factor'], varargs=None, keywords=None, defaults=None), ('document', '7e5cac851fd9bad344230e1044b6a565')) paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=None, defaults=None), ('document', 'd803767ef4fb885013a28c98634e0bc4')) -paddle.fluid.layers.continuous_value_model (ArgSpec(args=['input', 'cvm', 'use_cvm'], varargs=None, keywords=None, defaults=(True,)), ('document', 'c03490ffaa1b78258747157c313db4cd')) +paddle.fluid.layers.continuous_value_model (ArgSpec(args=['input', 'cvm', 'use_cvm'], varargs=None, keywords=None, defaults=(True,)), ('document', 'b335b531931cc8b2d19c65980eadfc1e')) paddle.fluid.layers.where (ArgSpec(args=['condition'], varargs=None, keywords=None, defaults=None), ('document', 'b1e1487760295e1ff55307b880a99e18')) paddle.fluid.layers.sign (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'b56afe9ae3fc553c95d907fd7ef6c314')) paddle.fluid.layers.deformable_conv (ArgSpec(args=['input', 'offset', 'mask', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'deformable_groups', 'im2col_step', 'param_attr', 'bias_attr', 'modulated', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, None, None, True, None)), ('document', '9b9c9d1282f994ccd4538201e0b6856f')) diff --git a/python/paddle/fluid/data_feed_desc.py b/python/paddle/fluid/data_feed_desc.py index fa4e35a44c4..eca9543c60a 100644 --- a/python/paddle/fluid/data_feed_desc.py +++ b/python/paddle/fluid/data_feed_desc.py @@ -92,7 +92,7 @@ class DataFeedDesc(object): def set_batch_size(self, batch_size): """ - Set batch size. Will be effective during training + Set :attr:`batch_size` in :ref:`api_fluid_DataFeedDesc` . :attr:`batch_size` can be changed during training. Example: .. code-block:: python @@ -120,16 +120,20 @@ class DataFeedDesc(object): data_feed.set_batch_size(128) Args: - batch_size: batch size + batch_size (int): The number of batch size. + + Returns: + None. """ self.proto_desc.batch_size = batch_size def set_dense_slots(self, dense_slots_name): """ - Set if a specific slot will be dense. Will be effective during training. - features for a dense slot will be fed into a Tensor, while those for a - sparse slot will be fed into a LoDTensor + Set slots in :attr:`dense_slots_name` as dense slots. **Note: In default, all slots are sparse slots.** + + Features for a dense slot will be fed into a Tensor, while those for a + sparse slot will be fed into a LoDTensor. Example: .. code-block:: python @@ -157,10 +161,11 @@ class DataFeedDesc(object): data_feed.set_dense_slots(['words']) Args: - dense_slots_name: a list of slot names which will be set dense + dense_slots_name (list(str)): a list of slot names which will be set dense. + + Returns: + None. - Note: - Default is sparse for all slots """ if self.proto_desc.name != "MultiSlotDataFeed": raise ValueError( diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 0b287d60ece..757cec6ec5b 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -14878,32 +14878,34 @@ def continuous_value_model(input, cvm, use_cvm=True): **continuous_value_model layers** - continuous value model(cvm). Now, it only considers show and click value in CTR project. - We assume that input is an embedding vector with cvm_feature, whose shape is [N * D] (D is 2 + embedding dim). - If use_cvm is True, it will log(cvm_feature), and output shape is [N * D]. - If use_cvm is False, it will remove cvm_feature from input, and output shape is [N * (D - 2)]. - - This layer accepts a tensor named input which is ID after embedded(lod level is 1), cvm is a show_click info. + Now, this OP is used in CTR project to remove or dispose show and click value in :attr:`input`. - Args: + :attr:`input` is an embedding vector including show and click value, whose shape is :math:`[N, D]` (N is batch size. D is `2 + embedding dim` ). + Show and click at first two dims of embedding vector D. + If :attr:`use_cvm` is True, it will caculate :math:`log(show)` and :math:`log(click)` , and output shape is :math:`[N, D]` . + If :attr:`use_cvm` is False, it will remove show and click from :attr:`input` , and output shape is :math:`[N, D - 2]` . + :attr:`cvm` is show_click info, whose shape is :math:`[N, 2]` . - input (Variable): a 2-D LodTensor with shape [N x D], where N is the batch size, D is 2 + the embedding dim. lod level = 1. - cvm (Variable): a 2-D Tensor with shape [N x 2], where N is the batch size, 2 is show and click. - use_cvm (bool): use cvm or not. if use cvm, the output dim is the same as input - if don't use cvm, the output dim is input dim - 2(remove show and click) - (cvm op is a customized op, which input is a sequence has embed_with_cvm default, so we need an op named cvm to decided whever use it or not.) + Args: + input (Variable): The input variable. A 2-D LoDTensor with shape :math:`[N, D]` , where N is the batch size, D is `2 + the embedding dim` . `lod level = 1` . + A Tensor with type float32, float64. + cvm (Variable): Show and click variable. A 2-D Tensor with shape :math:`[N, 2]` , where N is the batch size, 2 is show and click. + A Tensor with type float32, float64. + use_cvm (bool): Use show_click or not. if use, the output dim is the same as input. + if not use, the output dim is `input dim - 2` (remove show and click) Returns: - Variable: A 2-D LodTensor with shape [N x D], if use cvm, D is equal to input dim, if don't use cvm, D is equal to input dim - 2. + Variable: A 2-D LodTensor with shape :math:`[N, M]` . if :attr:`use_cvm` = True, M is equal to input dim D. if False, M is equal to `D - 2`. \ + A Tensor with same type as input. Examples: .. code-block:: python import paddle.fluid as fluid - input = fluid.layers.data(name="input", shape=[-1, 1], lod_level=1, append_batch_size=False, dtype="int64")#, stop_gradient=False) - label = fluid.layers.data(name="label", shape=[-1, 1], append_batch_size=False, dtype="int64") + input = fluid.data(name="input", shape=[64, 1], dtype="int64") + label = fluid.data(name="label", shape=[64, 1], dtype="int64") embed = fluid.layers.embedding( input=input, size=[100, 11], -- GitLab