logsumexp: impl kernel, refine docs (#26307)

9b14117c · zhupengyang · GitHub · 5c2b9258 · 9b14117c · 9b14117c
5 changed file
--- a/paddle/fluid/operators/reduce_ops/logsumexp_op.cc
+++ b/paddle/fluid/operators/reduce_ops/logsumexp_op.cc
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/operators/reduce_ops/logsumexp_op.h"
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+namespace paddle {
+namespace operators {
+class LogsumexpOpMaker : public ops::ReduceOpMaker {
+ protected:
+  virtual std::string GetName() const { return "logsumexp"; }
+  virtual std::string GetOpType() const { return "Reduce logsumexp"; }
+};
+template <typename T>
+class LogsumexpGradOpMaker : public framework::SingleGradOpMaker<T> {
+ public:
+  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
+ protected:
+  void Apply(GradOpPtr<T> op) const override {
+    op->SetType("logsumexp_grad");
+    op->SetInput("X", this->Input("X"));
+    op->SetInput("Out", this->Output("Out"));
+    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
+    op->SetAttrMap(this->Attrs());
+    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+REGISTER_OPERATOR(logsumexp, ops::ReduceOp, ops::LogsumexpOpMaker,
+                  ops::LogsumexpGradOpMaker<paddle::framework::OpDesc>,
+                  ops::LogsumexpGradOpMaker<paddle::imperative::OpBase>);
+REGISTER_OPERATOR(logsumexp_grad, ops::ReduceGradOp);
+REGISTER_OP_CPU_KERNEL(logsumexp,
+                       ops::ReduceKernel<paddle::platform::CPUDeviceContext,
+                                         float, ops::LogsumexpFunctor>,
+                       ops::ReduceKernel<paddle::platform::CPUDeviceContext,
+                                         double, ops::LogsumexpFunctor>);
+REGISTER_OP_CPU_KERNEL(
+    logsumexp_grad, ops::ReduceGradKernel<paddle::platform::CPUDeviceContext,
+                                          float, ops::LogsumexpGradFunctor>,
+    ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, double,
+                          ops::LogsumexpGradFunctor>);
--- a/paddle/fluid/operators/reduce_ops/logsumexp_op.cu
+++ b/paddle/fluid/operators/reduce_ops/logsumexp_op.cu
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/operators/reduce_ops/cub_reduce.h"
+#include "paddle/fluid/operators/reduce_ops/logsumexp_op.h"
+REGISTER_OP_CUDA_KERNEL(logsumexp,
+                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
+                                          float, ops::LogsumexpFunctor>,
+                        ops::ReduceKernel<paddle::platform::CUDADeviceContext,
+                                          double, ops::LogsumexpFunctor>);
+REGISTER_OP_CUDA_KERNEL(
+    logsumexp_grad, ops::ReduceGradKernel<paddle::platform::CUDADeviceContext,
+                                          float, ops::LogsumexpGradFunctor>,
+    ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
+                          ops::LogsumexpGradFunctor>);
--- a/paddle/fluid/operators/reduce_ops/logsumexp_op.h
+++ b/paddle/fluid/operators/reduce_ops/logsumexp_op.h
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
+namespace paddle {
+namespace operators {
+struct LogsumexpFunctor {
+  template <typename DeviceContext, typename X, typename Y, typename Dim>
+  void operator()(const DeviceContext& place, X* x, Y* y, const Dim& dim) {
+    auto x_dim = x->dimensions();
+    auto t_dim = x_dim;
+    for (int i = 0; i < static_cast<int>(dim.size()); i++) {
+      t_dim[dim[i]] = 1;
+    }
+    auto r_dim = x_dim;
+    for (int i = 0; i < static_cast<int>(r_dim.size()); i++) {
+      r_dim[i] = 1;
+    }
+    for (int i = 0; i < static_cast<int>(dim.size()); i++) {
+      r_dim[dim[i]] = x_dim[dim[i]];
+    }
+    auto y_dim = y->dimensions();
+    auto x_max = x->maximum(dim);
+    y->device(place) =
+        (x_max +
+         (*x - x_max.reshape(t_dim).broadcast(r_dim)).exp().sum(dim).log())
+            .reshape(y_dim);
+  }
+};
+struct LogsumexpGradFunctor {
+  template <typename DeviceContext, typename X, typename Y, typename DX,
+            typename DY, typename Dim>
+  void operator()(const DeviceContext& place, X* x, Y* y, DX* dx, DY* dy,
+                  const Dim& dim, int size) {
+    dx->device(place) = dy->broadcast(dim) * (*x - y->broadcast(dim)).exp();
+  }
+};
+}  // namespace operators
+}  // namespace paddle
--- a/python/paddle/fluid/tests/unittests/test_logsumexp.py
+++ b/python/paddle/fluid/tests/unittests/test_logsumexp.py
@@ -12,64 +12,128 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import print_function
 import paddle
-import paddle.fluid as fluid
 import unittest
 import numpy as np
 from op_test import OpTest
-from paddle.fluid import Program, program_guard
-from paddle.fluid.layer_helper import LayerHelper
-class TestLogSumOpError(unittest.TestCase):
+def ref_logsumexp(x, axis=None, keepdim=False, reduce_all=False):
+    if isinstance(axis, int):
+        axis = (axis, )
+    elif isinstance(axis, list):
+        axis = tuple(axis)
+    if reduce_all:
+        axis = None
+    out = np.log(np.exp(x).sum(axis=axis, keepdims=keepdim))
+    return out
+class TestLogsumexp(OpTest):
+    def setUp(self):
+        self.op_type = 'logsumexp'
+        self.shape = [2, 3, 4, 5]
+        self.dtype = 'float64'
+        self.axis = [-1]
+        self.keepdim = False
+        self.reduce_all = False
+        self.set_attrs()
+        np.random.seed(10)
+        x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
+        out = ref_logsumexp(x, self.axis, self.keepdim, self.reduce_all)
+        self.inputs = {'X': x}
+        self.outputs = {'Out': out}
+        self.attrs = {
+            'dim': self.axis,
+            'keep_dim': self.keepdim,
+            'reduce_all': self.reduce_all
+        }
+    def set_attrs(self):
+        pass
+    def test_check_output(self):
+        self.check_output()
+    def test_check_grad(self):
+        self.check_grad(['X'], ['Out'])
+class TestLogsumexp_shape(TestLogsumexp):
+    def set_attrs(self):
+        self.shape = [4, 5, 6]
+class TestLogsumexp_axis(TestLogsumexp):
+    def set_attrs(self):
+        self.axis = [0, -1]
+class TestLogsumexp_axis_all(TestLogsumexp):
+    def set_attrs(self):
+        self.axis = [0, 1, 2, 3]
+class TestLogsumexp_keepdim(TestLogsumexp):
+    def set_attrs(self):
+        self.keepdim = True
+class TestLogsumexp_reduce_all(TestLogsumexp):
+    def set_attrs(self):
+        self.reduce_all = True
+class TestLogsumexpError(unittest.TestCase):
    def test_errors(self):
-        with program_guard(Program(), Program()):
+        with paddle.static.program_guard(paddle.static.Program()):
+            self.assertRaises(TypeError, paddle.logsumexp, 1)
-            x1 = fluid.layers.data(name='x1', shape=[120], dtype="uint8")
+            x1 = paddle.data(name='x1', shape=[120], dtype="int32")
-            self.assertRaises(Exception, paddle.logsumexp, x1)
+            self.assertRaises(TypeError, paddle.logsumexp, x1)
-            x2 = fluid.layers.data(name='x2', shape=[2, 3], dtype="int")
-            self.assertRaises(Exception, paddle.logsumexp, x2)
+class TestLogsumexpAPI(unittest.TestCase):
+    def setUp(self):
-            x3 = fluid.layers.data(name='x3', shape=[3], dtype="float16")
+        self.shape = [2, 3, 4, 5]
-            self.assertRaises(Exception, paddle.logsumexp, x3)
+        self.x = np.random.uniform(-1, 1, self.shape).astype(np.float32)
+        self.place = paddle.CUDAPlace(0) if paddle.fluid.core.is_compiled_with_cuda() \
-            self.assertRaises(AssertionError, paddle.logsumexp, None)
+            else paddle.CPUPlace()
+    def api_case(self, axis=None, keepdim=False):
-class TestLogSumExpOp(unittest.TestCase):
+        out_ref = ref_logsumexp(self.x, axis, keepdim)
-    def test_dygraph(self):
+        with paddle.static.program_guard(paddle.static.Program()):
-        with fluid.dygraph.guard():
+            x = paddle.data('X', self.shape)
-            np_x = np.random.uniform(0.1, 1, [123]).astype(np.float32)
+            out = paddle.logsumexp(x, axis, keepdim)
-            x = fluid.dygraph.to_variable(np_x)
+            exe = paddle.static.Executor(self.place)
-            self.assertTrue(
+            res = exe.run(feed={'X': self.x}, fetch_list=[out])
-                np.allclose(
+        self.assertTrue(np.allclose(res[0], out_ref))
-                    paddle.logsumexp(x).numpy(), np.log(np.sum(np.exp(np_x)))))
+        paddle.disable_static(self.place)
-            np_x = np.random.uniform(0.1, 1, [2, 3, 4]).astype(np.float32)
+        x = paddle.to_variable(self.x)
-            x = fluid.dygraph.to_variable(np_x)
+        out = paddle.logsumexp(x, axis, keepdim)
-            self.assertTrue(
+        self.assertTrue(np.allclose(out.numpy(), out_ref))
-                np.allclose(
+        paddle.enable_static()
-                    paddle.logsumexp(
-                        x, dim=[1, 2]).numpy(),
+    def test_api(self):
-                    np.log(np.sum(np.exp(np_x), axis=(1, 2)))))
+        self.api_case()
+        self.api_case(2)
-            np_x = np.random.uniform(0.1, 1, [2, 3, 4]).astype(np.float32)
+        self.api_case([-1])
-            x = fluid.dygraph.to_variable(np_x)
+        self.api_case([2, -3])
-            self.assertTrue(
+        self.api_case((0, 1, -1))
-                np.allclose(
+        self.api_case(keepdim=True)
-                    paddle.logsumexp(
-                        x, dim=[2]).numpy(),
+    def test_alias(self):
-                    np.log(np.sum(np.exp(np_x), axis=(2)))))
+        paddle.disable_static(self.place)
+        x = paddle.to_variable(self.x)
-            np_x = np.random.uniform(0.1, 1, [2, 3, 4]).astype(np.float32)
+        out1 = paddle.logsumexp(x)
-            x = fluid.dygraph.to_variable(np_x)
+        out2 = paddle.tensor.logsumexp(x)
-            self.assertTrue(
+        out3 = paddle.tensor.math.logsumexp(x)
-                np.allclose(
+        out_ref = ref_logsumexp(self.x)
-                    paddle.logsumexp(
+        for out in [out1, out2, out3]:
-                        x, keepdim=True).numpy(),
+            self.assertTrue(np.allclose(out.numpy(), out_ref))
-                    np.log(np.sum(np.exp(np_x), keepdims=True))))
+        paddle.enable_static()
 if __name__ == '__main__':

--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -82,6 +82,7 @@ __all__ = [
        'floor',
        'increment',
        'log',
+        'logsumexp',
        'mul',
        'multiplex',
        'prod',
@@ -964,69 +965,73 @@ def addmm(input, x, y, beta=1.0, alpha=1.0, name=None):
    return out
-def logsumexp(x, dim=None, keepdim=False, name=None):
+def logsumexp(x, axis=None, keepdim=False, name=None):
    """
-	:alias_main: paddle.logsumexp
+    This OP calculates the log of the sum of exponentials of ``x`` along ``axis`` .
-	:alias: paddle.logsumexp,paddle.tensor.logsumexp,paddle.tensor.math.logsumexp
-    This operator calculates the log of the sum of exponentials of the input Tensor.
    .. math::
       logsumexp(x) = \log\sum exp(x)
+    Args:
-    Parameters:
+        x (Tensor): The input Tensor with data type float32, float64.
-       x (Variable): Input LoDTensor or Tensor. Must be one of the following types: float32, float64.
+        axis (int|list|tuple, optional): The axis along which to perform
-       dim (list|int, optional): The dimensions along which the sum is performed. If :attr:`None`,
+            logsumexp calculations. ``axis`` should be int, list(int) or
-         sum all elements of :attr:`input` and return a Tensor variable with a single element,
+            tuple(int). If ``axis`` is a list/tuple of dimension(s), logsumexp
-         otherwise must be in the range :math:`[-rank(input), rank(input))`. If :math:`dim[i] < 0`,
+            is calculated along all element(s) of ``axis`` . ``axis`` or
-         the dimension to reduce is :math:`rank + dim[i]`.
+            element(s) of ``axis`` should be in range [-D, D), where D is the
-       keep_dim (bool, optional): Whether to reserve the reduced dimension in the output Tensor.
+            dimensions of ``x`` . If ``axis`` or element(s) of ``axis`` is
-         The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim`
+            less than 0, it works the same way as :math:`axis + D` . If
-         is true, default value is False.
+            ``axis`` is None, logsumexp is calculated along all elements of
-       name (str, optional): The default value is None.  Normally there is no need for user to
+            ``x``. Default is None.
-         set this property.  For more information, please refer to :ref:`api_guide_Name`
+        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
+            in the output Tensor. If ``keep_dim`` is True, the dimensions of
+            the output Tensor is the same as ``x`` except in the reduced
+            dimensions(it is of size 1 in this case). Otherwise, the shape of
+            the output Tensor is squeezed in ``axis`` . Default is False.
+        name (str, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.
    Returns:
-       Variable: The calcuated result Tensor/LoDTensor.
+        Tensor, results of logsumexp along ``axis`` of ``x``, with the same data
+        type as ``x``.
    Examples:
    .. code-block:: python
        import paddle
-        import paddle.fluid as fluid
        import numpy as np
-        with fluid.dygraph.guard():
+        paddle.disable_static()
-          np_x = np.random.uniform(0.1, 1, [10]).astype(np.float32)
-          x = fluid.dygraph.to_variable(np_x)
-          print(paddle.logsumexp(x).numpy())
-    ..  code-block:: python
-        import paddle
-        import paddle.fluid as fluid
-        import numpy as np
-        with fluid.dygraph.guard():
+        x = np.array([[-1.5, 0., 2.], [3., 1.2, -2.4]])
-            np_x = np.random.uniform(0.1, 1, [2, 3, 4]).astype(np.float32)
+        x = paddle.to_tensor(x)
-            x = fluid.dygraph.to_variable(np_x)
+        out1 = paddle.logsumexp(x) # [3.4691226]
-            print(paddle.logsumexp(x, dim=1).numpy())
+        out2 = paddle.logsumexp(x, 1) # [2.15317821, 3.15684602]
-            print(paddle.logsumexp(x, dim=[0, 2]).numpy())
    """
-    op_type = 'logsumexp'
+    if isinstance(axis, int):
-    assert x is not None, 'x cannot be None in {}'.format(op_type)
+        axis = [axis]
+    reduce_all = True if axis is None \
-    # reduce_sum does not support float16
+        or len(axis)==0 \
-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], op_type)
+        or len(axis) == len(x.shape) else False
+    if axis is None or len(axis) == 0:
+        axis = [0]
-    exp_out = layers.exp(x)
+    if in_dygraph_mode():
-    sum_out = layers.reduce_sum(exp_out, dim, keepdim)
+        return core.ops.logsumexp(x, 'dim', axis, 'keep_dim', keepdim,
+                                    'reduce_all', reduce_all)
-    return layers.log(sum_out, name)
+    check_variable_and_dtype(x, 'x',
+                             ['float32', 'float64'],
+                             'logsumexp')
+    helper = LayerHelper('logsumexp', **locals())
+    attrs = {'dim': axis, 'keep_dim': keepdim, 'reduce_all': reduce_all}
+    out = helper.create_variable_for_type_inference(x.dtype)
+    helper.append_op(
+        type='logsumexp', inputs={'X': x}, outputs={'Out': out}, attrs=attrs)
+    return out
 def inverse(x, name=None):