diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index c438d6edf29417f2beb2b12f19df6d52a1def3da..c4706a648abf3fc85ce357063e8f266c96d5fee9 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -155,7 +155,30 @@ void BindImperative(pybind11::module *m_ptr) { auto &m = *m_ptr; py::class_ backward_strategy( - m, "BackwardStrategy", R"DOC()DOC"); + m, "BackwardStrategy", R"DOC( + + BackwardStrategy is a descriptor of a how to run the backward process. Now it has: + + 1. :code:`sort_sum_gradient`, which will sum the gradient by the reverse order of trace. + + Examples: + + .. code-block:: python + import numpy as np + import paddle.fluid as fluid + from paddle.fluid import FC + + x = np.ones([2, 2], np.float32) + with fluid.dygraph.guard(): + inputs2 = [] + for _ in range(10): + inputs2.append(fluid.dygraph.base.to_variable(x)) + ret2 = fluid.layers.sums(inputs2) + loss2 = fluid.layers.reduce_sum(ret2) + backward_strategy = fluid.dygraph.BackwardStrategy() + backward_strategy.sort_sum_gradient = True + loss2.backward(backward_strategy) + )DOC"); backward_strategy.def(py::init()) .def_property("sort_sum_gradient", [](const imperative::detail::BackwardStrategy &self) { diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index 13b09f14556b576de95736a0fd641315742a66d0..598facce4b703116aa7f093364695fcd72fe1d7b 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -54,6 +54,33 @@ def _dygraph_not_support_(func): def _no_grad_(func): + """ + This Decorator will avoid the func being decorated creating backward network in dygraph mode + + Args: + func: the func don't need grad + + Examples: + + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + + @fluid.dygraph.no_grad + def test_layer(): + with fluid.dygraph.guard(): + inp = np.ones([3, 32, 32], dtype='float32') + t = fluid.dygraph.base.to_variable(inp) + fc1 = fluid.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1) + fc2 = fluid.FC('fc2', size=4) + ret = fc1(t) + dy_ret = fc2(ret) + + test_layer() + + """ + def __impl__(*args, **kwargs): with _switch_tracer_mode_guard_(is_train=False): return func(*args, **kwargs) @@ -67,6 +94,31 @@ not_support = wrap_decorator(_dygraph_not_support_) @signature_safe_contextmanager def guard(place=None): + """ + This context will create a dygraph context for dygraph to run + + Args: + place(fluid.CPUPlace|fluid.CUDAPlace|None): Place to run + + return: + None + + Examples: + + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + + with fluid.dygraph.guard(): + inp = np.ones([3, 32, 32], dtype='float32') + t = fluid.dygraph.base.to_variable(inp) + fc1 = fluid.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1) + fc2 = fluid.FC('fc2', size=4) + ret = fc1(t) + dy_ret = fc2(ret) + + """ train = framework.Program() startup = framework.Program() tracer = Tracer(train.current_block().desc) @@ -85,6 +137,29 @@ def guard(place=None): def to_variable(value, block=None, name=None): + """ + This function will create a variable from ndarray + + Args: + value(ndarray): the numpy value need to be convert + block(fluid.Block|None): which block this variable will be in + name(str|None): Name of Varaible + + return: + Variable: The variable created from given numpy + + Examples: + + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + + with fluid.dygraph.guard(): + x = np.ones([2, 2], np.float32) + y = fluid.dygraph.to_variable(x) + + """ if isinstance(value, np.ndarray): assert enabled(), "to_variable could only be called in dygraph mode" diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 9bdfef4029e49d4967bcbcdf1bf77fa8b1479e8a..753cb26dc7659f41505a8e4944abdd8088eb5d4a 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -1170,22 +1170,15 @@ class Embedding(layers.Layer): This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in a lookup table. The result of this lookup is the embedding of each ID in the :attr:`input`. - - All the input variables are passed in as local variables to the LayerHelper - constructor. + All the input variables are passed in as local variables to the LayerHelper constructor Args: name_scope: See base class. - size(tuple|list): The shape of the look up table parameter. It should - have two elements which indicate the size of the dictionary of - embeddings and the size of each embedding vector respectively. + size(tuple|list): The shape of the look up table parameter. It should have two elements which indicate the size of the dictionary of embeddings and the size of each embedding vector respectively. + is_sparse(bool): The flag indicating whether to use sparse update. is_distributed(bool): Whether to run lookup table from remote parameter server. - padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup. - Otherwise the given :attr:`padding_idx` indicates padding the output - with zeros whenever lookup encounters it in :attr:`input`. If - :math:`padding_idx < 0`, the :attr:`padding_idx` to use in lookup is - :math:`size[0] + dim`. + padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup. Otherwise the given :attr:`padding_idx` indicates padding the output with zeros whenever lookup encounters it in :attr:`input`. If :math:`padding_idx < 0`, the :attr:`padding_idx` to use in lookup is :math:`size[0] + dim`. param_attr(ParamAttr): Parameters for this layer dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, float_16, int etc @@ -1197,10 +1190,15 @@ class Embedding(layers.Layer): .. code-block:: python - dict_size = len(dataset.ids) - input = fluid.layers.data(name='ids', shape=[32, 32], dtype='float32') - embedding = fluid.Embedding(size=[dict_size, 16]) - fc = embedding(input) + inp_word = np.array([[[1]]]).astype('int64') + dict_size = 20 + with fluid.dygraph.guard(): + emb = fluid.Embedding( + name_scope='embedding', + size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) + static_rlt3 = emb2(base.to_variable(inp_word)) """ def __init__(self, @@ -1509,12 +1507,15 @@ class GRUUnit(layers.Layer): class NCE(layers.Layer): """ - ${comment} + Compute and return the noise-contrastive estimation training loss. See + `Noise-contrastive estimation: A new estimation principle for unnormalized + statistical models + `_. + By default this operator uses a uniform distribution for sampling. Args: - input (Variable): input variable. - label (Variable): label. - num_total_classes (int):${num_total_classes_comment} + name_scope (str): See base class. + num_total_classes (int): Total number of classes in all samples sample_weight (Variable|None): A Variable of shape [batch_size, 1] storing a weight for each sample. The default weight for each sample is 1.0. @@ -1527,7 +1528,7 @@ class NCE(layers.Layer): If it is set to None or one attribute of ParamAttr, nce will create ParamAttr as bias_attr. If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None. - num_neg_samples (int): ${num_neg_samples_comment} + num_neg_samples (int): The number of negative classes. The default value is 10. name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. Default: None. sampler (str): The sampler used to sample class from negtive classes. @@ -1546,37 +1547,45 @@ class NCE(layers.Layer): Examples: .. code-block:: python + import numpy as np + import paddle.fluid as fluid + window_size = 5 - words = [] - for i in xrange(window_size): - words.append(layers.data( - name='word_{0}'.format(i), shape=[1], dtype='int64')) - - dict_size = 10000 - label_word = int(window_size / 2) + 1 - - embs = [] - for i in xrange(window_size): - if i == label_word: - continue - - emb = layers.embedding(input=words[i], size=[dict_size, 32], - param_attr='emb.w', is_sparse=True) - embs.append(emb) - - embs = layers.concat(input=embs, axis=1) - loss = layers.nce(input=embs, label=words[label_word], - num_total_classes=dict_size, param_attr='nce.w', - bias_attr='nce.b') - - #or use custom distribution - dist = fluid.layers.assign(input=np.array([0.05,0.5,0.1,0.3,0.05]).astype("float32")) - loss = layers.nce(input=embs, label=words[label_word], - num_total_classes=5, param_attr='nce.w', - bias_attr='nce.b', - num_neg_samples=3, - sampler="custom_dist", - custom_dist=dist) + dict_size = 20 + label_word = int(window_size // 2) + 1 + inp_word = np.array([[[1]], [[2]], [[3]], [[4]], [[5]]]).astype('int64') + nid_freq_arr = np.random.dirichlet(np.ones(20) * 1000).astype('float32') + + with fluid.dygraph.guard(): + words = [] + for i in range(window_size): + words.append(fluid.dygraph.base.to_variable(inp_word[i])) + + emb = fluid.Embedding( + 'embedding', + size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) + + embs3 = [] + for i in range(window_size): + if i == label_word: + continue + + emb_rlt = emb(words[i]) + embs3.append(emb_rlt) + + embs3 = fluid.layers.concat(input=embs3, axis=1) + nce = fluid.NCE('nce', + num_total_classes=dict_size, + num_neg_samples=2, + sampler="custom_dist", + custom_dist=nid_freq_arr.tolist(), + seed=1, + param_attr='nce.w', + bias_attr='nce.b') + + nce_loss3 = nce(embs3, words[label_word]) """ @@ -1739,13 +1748,13 @@ class PRelu(layers.Layer): y = \max(0, x) + \\alpha * \min(0, x) Args: - x (Variable): The input tensor. - param_attr(ParamAttr|None): The parameter attribute for the learnable - weight (alpha). + name_scope (str): See base class. mode (string): The mode for weight sharing. It supports all, channel and element. all: all elements share same weight channel:elements in a channel share same weight element:each element has a weight + param_attr(ParamAttr|None): The parameter attribute for the learnable + weight (alpha). name(str|None): A name for this layer(optional). If set None, the layer will be named automatically. @@ -1756,9 +1765,14 @@ class PRelu(layers.Layer): .. code-block:: python - x = fluid.layers.data(name="x", shape=[10,10], dtype="float32") + inp_np = np.ones([5, 200, 100, 100]).astype('float32') + with fluid.dygraph.guard(): mode = 'channel' - output = fluid.layers.prelu(x,mode) + prelu = fluid.PRelu( + 'prelu', + mode=mode, + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(1.0))) + dy_rlt = prelu(fluid.dygraph.base.to_variable(inp_np)) """ def __init__(self, name_scope, mode, param_attr=None):