Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/refine_gather_reduce

2aaa75ec · chengduoZH · 690cd1f7 · 0032b4a4 · 2aaa75ec · 2aaa75ec
11 changed file
--- a/Dockerfile
+++ b/Dockerfile
@@ -57,7 +57,7 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
 # specify sphinx version as 1.5.6 and remove -U option for [pip install -U
 # sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
 # version(1.7.1 for now), which causes building documentation failed.
-RUN pip install --upgrade pip && \
+RUN pip install --upgrade pip==9.0.3 && \
    pip install -U wheel && \
    pip install -U docopt PyYAML sphinx==1.5.6 && \
    pip install sphinx-rtd-theme==0.1.9 recommonmark

--- a/cmake/external/grpc.cmake
+++ b/cmake/external/grpc.cmake
@@ -33,7 +33,7 @@ ExternalProject_Add(
    extern_grpc
    DEPENDS protobuf zlib
    GIT_REPOSITORY "https://github.com/grpc/grpc.git"
-    GIT_TAG "v1.11.x"
+    GIT_TAG "v1.10.x"
    PREFIX          ${GRPC_SOURCES_DIR}
    UPDATE_COMMAND  ""
    CONFIGURE_COMMAND ""

--- a/doc/fluid/api/layers.rst
+++ b/doc/fluid/api/layers.rst
@@ -473,6 +473,12 @@ multiplex
 ..  autofunction:: paddle.fluid.layers.multiplex
    :noindex:
+label_smooth
+------------
+..  autofunction:: paddle.fluid.layers.label_smooth
+    :noindex:
 ops
 ===

--- a/doc/fluid/dev/index_cn.rst
+++ b/doc/fluid/dev/index_cn.rst
@@ -4,6 +4,7 @@
 .. toctree::
  :maxdepth: 1
+  api_doc_std_cn.md
  new_op_cn.md
  new_op_kernel.md
  use_eigen_cn.md

--- a/doc/fluid/dev/index_en.rst
+++ b/doc/fluid/dev/index_en.rst
@@ -4,6 +4,7 @@ Development
 .. toctree::
  :maxdepth: 1
+  api_doc_std_en.md
  new_op_en.md
  new_op_kernel.md
  use_eigen_en.md

--- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc
+++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc
@@ -55,21 +55,21 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
  }
 }
-void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result, OpDesc *op,
+void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result,
+                                                const OpDesc &op,
                                                const platform::Place &p,
                                                const size_t &i) const {
  auto *op_handle = result->ops_.back().get();
-  op_handle->dev_ctxes_[p] = const_cast<platform::DeviceContext *>(
+  op_handle->dev_ctxes_[p] = platform::DeviceContextPool::Instance().Get(p);
-      platform::DeviceContextPool::Instance().Get(p));
-  auto var_names = op->InputArgumentNames();
+  auto var_names = op.InputArgumentNames();
  for (auto &each_var_name : var_names) {
    VarHandle *var = CreateOrGetLatestVarHandle(result, each_var_name, p, i);
    op_handle->AddInput(var);
  }
-  var_names = op->OutputArgumentNames();
+  var_names = op.OutputArgumentNames();
  for (auto &each_var_name : var_names) {
    CreateOpOutput(result, op_handle, each_var_name, p, i);
@@ -107,7 +107,7 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
      result.ops_.emplace_back(new SendOpHandle(*op, s, p));
      // Create inputs for output on original place and no ssa output
      // is created for send op.
-      CreateOpHandleIOs(&result, op, p, 0);
+      CreateOpHandleIOs(&result, *op, p, 0);
      continue;
    }
@@ -117,7 +117,7 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
      result.ops_.emplace_back(new ComputationOpHandle(*op, s, p));
      auto *op_handle = result.ops_.back().get();
-      CreateOpHandleIOs(&result, op, p, i);
+      CreateOpHandleIOs(&result, *op, p, i);
      auto var_names = op->OutputArgumentNames();

--- a/paddle/fluid/framework/details/multi_devices_graph_builder.h
+++ b/paddle/fluid/framework/details/multi_devices_graph_builder.h
@@ -45,8 +45,8 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
  std::unique_ptr<SSAGraph> Build(const ProgramDesc &program) const override;
 private:
-  void CreateOpHandleIOs(SSAGraph *result, OpDesc *op, const platform::Place &p,
+  void CreateOpHandleIOs(SSAGraph *result, const OpDesc &op,
-                         const size_t &i) const;
+                         const platform::Place &p, const size_t &i) const;
 private:
  std::string loss_var_name_;

--- a/paddle/fluid/framework/program_desc_test.cc
+++ b/paddle/fluid/framework/program_desc_test.cc
@@ -66,7 +66,7 @@ TEST(ProgramDesc, copy_ctor) {
  for (size_t i = 0; i < global_block->OpSize(); ++i) {
    auto op_origin = global_block->Op(i);
-    auto op_copy = global_block->Op(i);
+    auto op_copy = global_block_copy->Op(i);
    ASSERT_EQ(op_origin->Type(), op_copy->Type());
    ASSERT_EQ(op_origin->Inputs(), op_copy->Inputs());
@@ -131,7 +131,7 @@ TEST(ProgramDescBind, serialize_and_deserialize) {
  for (size_t i = 0; i < global_block->OpSize(); ++i) {
    auto op_origin = global_block->Op(i);
-    auto op_restored = global_block->Op(i);
+    auto op_restored = global_block_restored->Op(i);
    ASSERT_EQ(op_origin->Type(), op_restored->Type());
    ASSERT_EQ(op_origin->Inputs(), op_restored->Inputs());

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -77,6 +77,7 @@ __all__ = [
    'lod_reset',
    'lrn',
    'pad',
+    'label_smooth',
 ]
@@ -3678,3 +3679,68 @@ def pad(x, paddings, pad_value=0., name=None):
        attrs={'paddings': paddings,
               'pad_value': float(pad_value)})
    return out
+def label_smooth(label,
+                 prior_dist=None,
+                 epsilon=0.1,
+                 dtype="float32",
+                 name=None):
+    """
+    Label smoothing is a mechanism to regularize the classifier layer and is
+    called label-smoothing regularization (LSR). 
+    Label smoothing is proposed to encourage the model to be less confident,
+    since optimizing the log-likelihood of the correct label directly may
+    cause overfitting and reduce the ability of the model to adapt. Label
+    smoothing replaces the ground-truth label :math:`y` with the weighted sum
+    of itself and some fixed distribution :math:`\mu`. For class :math:`k`,
+    i.e.
+    .. math::
+        \\tilde{y_k} = (1 - \epsilon) * y_k + \epsilon * \mu_k,
+    where :math:`1 - \epsilon` and :math:`\epsilon` are the weights
+    respectively, and :math:`\\tilde{y}_k` is the smoothed label. Usually
+    uniform distribution is used for :math:`\mu`.
+    See more details about label smoothing in https://arxiv.org/abs/1512.00567.
+    Args:
+        label(Variable): The input variable containing the label data. The
+                          label data should use one-hot representation.
+        prior_dist(Variable): The prior distribution to be used to smooth
+                              labels. If not provided, an uniform distribution
+                              is used. The shape of :attr:`prior_dist` should
+                              be :math:`(1, class\_num)`. 
+        epsilon(float): The weight used to mix up the original ground-truth
+                        distribution and the fixed distribution.
+        dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, 
+                                                  float_64, int etc.
+        name(str|None): A name for this layer(optional). If set None, the layer
+                        will be named automatically.
+    Returns:
+        Variable: The tensor variable containing the smoothed labels.
+    Examples:
+        .. code-block:: python
+            label = layers.data(name="label", shape=[1], dtype="float32")
+            one_hot_label = layers.one_hot(input=label, depth=10)
+            smooth_label = layers.label_smooth(
+                label=one_hot_label, epsilon=0.1, dtype="float32")
+    """
+    if epsilon > 1. or epsilon < 0.:
+        raise ValueError("The value of epsilon must be between 0 and 1.")
+    helper = LayerHelper("label_smooth", **locals())
+    label.stop_gradient = True
+    smooth_label = helper.create_tmp_variable(dtype)
+    helper.append_op(
+        type="label_smooth",
+        inputs={"X": label,
+                "PriorDist": prior_dist} if prior_dist else {"X": label},
+        outputs={"Out": smooth_label},
+        attrs={"epsilon": float(epsilon)})
+    return smooth_label
--- a/python/paddle/fluid/metrics.py
+++ b/python/paddle/fluid/metrics.py
@@ -169,7 +169,7 @@ class Accuracy(MetricBase):
        return self.value / self.weight
-class ChunkEvalutor(MetricBase):
+class ChunkEvaluator(MetricBase):
    """
    Accumulate counter numbers output by chunk_eval from mini-batches and
    compute the precision recall and F1-score using the accumulated counter
@@ -177,7 +177,7 @@ class ChunkEvalutor(MetricBase):
    """
    def __init__(self, name=None):
-        super(ChunkEvalutor, self).__init__(name)
+        super(ChunkEvaluator, self).__init__(name)
        self.num_infer_chunks = 0
        self.num_label_chunks = 0
        self.num_correct_chunks = 0

--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -340,6 +340,16 @@ class TestBook(unittest.TestCase):
            print(layers.lod_reset(x=x, y=y))
        print(str(program))
+    def test_label_smooth(self):
+        program = Program()
+        with program_guard(program):
+            label = layers.data(name="label", shape=[1], dtype="float32")
+            one_hot_label = layers.one_hot(input=label, depth=10)
+            smooth_label = layers.label_smooth(
+                label=one_hot_label, epsilon=0.1, dtype="float32")
+            self.assertIsNotNone(smooth_label)
+        print(str(program))
 if __name__ == '__main__':
    unittest.main()