extension.py 14.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
# TODO: define the extention functions
16

L
Li Fuchen 已提交
17
import numpy as np
18 19 20

from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode

21
from ...common_ops_import import Variable
22 23 24 25 26
from ...fluid.data_feeder import (
    check_dtype,
    check_type,
    check_variable_and_dtype,
)
姜永久 已提交
27
from ...fluid.framework import in_dygraph_mode
28 29 30
from ...fluid.layer_helper import LayerHelper
from ...framework import convert_np_dtype_to_dtype_, core
from ...tensor.creation import assign
31

32 33
__all__ = []

34

L
Li Fuchen 已提交
35 36
def diag_embed(input, offset=0, dim1=-2, dim2=-1):
    """
Z
Zman 已提交
37
    Creates a tensor whose diagonals of certain 2D planes (specified by dim1 and dim2)
38
    are filled by ``input``. By default, a 2D plane formed by the last two dimensions
L
Li Fuchen 已提交
39
    of the returned tensor will be selected.
40

L
Li Fuchen 已提交
41
    The argument ``offset`` determines which diagonal is generated:
42

L
Li Fuchen 已提交
43 44 45
    - If offset = 0, it is the main diagonal.
    - If offset > 0, it is above the main diagonal.
    - If offset < 0, it is below the main diagonal.
46

L
Li Fuchen 已提交
47
    Args:
48
        input(Tensor|numpy.ndarray): The input tensor. Must be at least 1-dimensional. The input data type should be float32, float64, int32, int64.
L
Li Fuchen 已提交
49 50 51
        offset(int, optional): Which diagonal to consider. Default: 0 (main diagonal).
        dim1(int, optional): The first dimension with respect to which to take diagonal. Default: -2.
        dim2(int, optional): The second dimension with respect to which to take diagonal. Default: -1.
52

L
Li Fuchen 已提交
53
    Returns:
54
        Tensor, the output data type is the same as input data type.
55

L
Li Fuchen 已提交
56 57
    Examples:
        .. code-block:: python
58

Z
Zman 已提交
59
            import paddle
L
Li Fuchen 已提交
60
            import paddle.nn.functional as F
Z
Zman 已提交
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100

            diag_embed_input = paddle.arange(6)

            diag_embed_output1 = F.diag_embed(diag_embed_input)
            print(diag_embed_output1)
            # Tensor(shape=[6, 6], dtype=int64, place=Place(cpu), stop_gradient=True,
            #        [[0, 0, 0, 0, 0, 0],
            #         [0, 1, 0, 0, 0, 0],
            #         [0, 0, 2, 0, 0, 0],
            #         [0, 0, 0, 3, 0, 0],
            #         [0, 0, 0, 0, 4, 0],
            #         [0, 0, 0, 0, 0, 5]])

            diag_embed_output2 = F.diag_embed(diag_embed_input, offset=-1, dim1=0,dim2=1 )
            print(diag_embed_output2)
            # Tensor(shape=[7, 7], dtype=int64, place=Place(cpu), stop_gradient=True,
            #        [[0, 0, 0, 0, 0, 0, 0],
            #         [0, 0, 0, 0, 0, 0, 0],
            #         [0, 1, 0, 0, 0, 0, 0],
            #         [0, 0, 2, 0, 0, 0, 0],
            #         [0, 0, 0, 3, 0, 0, 0],
            #         [0, 0, 0, 0, 4, 0, 0],
            #         [0, 0, 0, 0, 0, 5, 0]])

            diag_embed_input_2dim = paddle.reshape(diag_embed_input,[2,3])
            print(diag_embed_input_2dim)
            # Tensor(shape=[2, 3], dtype=int64, place=Place(cpu), stop_gradient=True,
            #        [[0, 1, 2],
            #         [3, 4, 5]])
            diag_embed_output3 = F.diag_embed(diag_embed_input_2dim,offset= 0, dim1=0, dim2=2 )
            print(diag_embed_output3)
            # Tensor(shape=[3, 2, 3], dtype=int64, place=Place(cpu), stop_gradient=True,
            #        [[[0, 0, 0],
            #          [3, 0, 0]],

            #         [[0, 1, 0],
            #          [0, 4, 0]],

            #         [[0, 0, 2],
            #          [0, 0, 5]]])
L
Li Fuchen 已提交
101 102 103 104
    """
    if not isinstance(input, Variable):
        input = assign(input)

105
    if in_dygraph_mode():
106
        return _C_ops.diag_embed(input, offset, dim1, dim2)
107
    elif in_dynamic_mode():
108 109 110
        return _legacy_C_ops.diag_embed(
            input, "offset", offset, "dim1", dim1, "dim2", dim2
        )
111 112 113 114

    inputs = {'Input': [input]}
    attrs = {'offset': offset, 'dim1': dim1, 'dim2': dim2}

L
Li Fuchen 已提交
115
    def __check_input(input, offset, dim1, dim2):
116 117 118 119 120 121
        check_dtype(
            input.dtype,
            'Input',
            ['int32', 'int64', 'float16', 'float32', 'float64'],
            'diag_embed',
        )
L
Li Fuchen 已提交
122 123

        input_shape = list(input.shape)
124 125 126 127
        assert len(input_shape) >= 1, (
            "Input must be at least 1-dimensional, "
            "But received Input's dimensional: %s.\n" % len(input_shape)
        )
L
Li Fuchen 已提交
128

129 130
        assert np.abs(dim1) <= len(input_shape), (
            "Dim1 is out of range (expected to be in range of [%d, %d], but got %d).\n"
131
            % (-(len(input_shape) + 1), len(input_shape), dim1)
132
        )
L
Li Fuchen 已提交
133

134 135
        assert np.abs(dim2) <= len(input_shape), (
            "Dim2 is out of range (expected to be in range of [%d, %d], but got %d).\n"
136
            % (-(len(input_shape) + 1), len(input_shape), dim2)
137
        )
L
Li Fuchen 已提交
138 139 140

        dim1_ = dim1 if dim1 >= 0 else len(input_shape) + dim1 + 1
        dim2_ = dim2 if dim2 >= 0 else len(input_shape) + dim2 + 1
141 142 143 144
        assert dim1_ != dim2_, (
            "dim1 and dim2 cannot be the same dimension."
            "But received dim1 = %d, dim2 = %d\n" % (dim1, dim2)
        )
L
Li Fuchen 已提交
145

146
    __check_input(input, offset, dim1, dim2)
L
Li Fuchen 已提交
147 148 149 150
    helper = LayerHelper("diag_embed", **locals())

    out = helper.create_variable_for_type_inference(dtype=input.dtype)

151 152 153 154 155 156
    helper.append_op(
        type='diag_embed',
        inputs={'Input': [input]},
        attrs={'offset': offset, 'dim1': dim1, 'dim2': dim2},
        outputs={'Out': [out]},
    )
L
Li Fuchen 已提交
157 158
    out.stop_gradient = True
    return out
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199


def sequence_mask(x, maxlen=None, dtype='int64', name=None):
    r"""
    **SequenceMask Layer**

    This layer outputs a mask according to the input :code:`x` and
    :code:`maxlen` with data type of :code:`dtype`.

    Supposing :code:`x` is a Tensor with shape [d_1, d_2, ..., d_n], the
    :code:`y` is a mask with shape [d_1, d_2, ..., d_n, maxlen], where:

    .. math::

        y(i_1, i_2,..., i_n, j) = (j < x(i_1, i_2,..., i_n))

    .. code-block:: text

        Case:

        Consider input:
            x = [3, 1, 1, 0]    max_len = 4

        then we get out:
            mask = [[1, 1, 1, 0],
                    [1, 0, 0, 0],
                    [1, 0, 0, 0],
                    [0, 0, 0, 0]]

    Args:
        x (Variable): Input tensor of sequence_mask layer, \
            whose elements are integers less than :code:`maxlen`. \
            Tensor or LodTensor with shape [d_1, d_2, ..., d_n].
        maxlen (int, optional): Maximum length of the sequence. If :code:`maxlen` \
                           is None, it would be replace with :math:`max(x)`.
        dtype (np.dtype|paddle.dtype|str, optional): Data type of the output, \
             ``int64`` by default.
        name(str, optional): For detailed information, please refer \
            to :ref:`api_guide_Name`. Usually name is no need to set and \
            None by default.

200
    Returns:
201
            Tensor, The output sequence mask. Tensor with shape [d_1, d_2, ..., d_n, maxlen] \
202 203 204 205 206 207 208 209 210 211 212
            and data type of :code:`dtype`. The data type should be bool, float32, float64, int8, \
            int32 or int64.

    Examples:
        .. code-block:: python

            import paddle

            lengths = paddle.to_tensor([10, 9, 8])
            mask = paddle.nn.functional.sequence_mask(lengths)

213 214 215 216 217
            print(mask)
            # Tensor(shape=[3, 10], dtype=int64, place=Place(gpu:0), stop_gradient=True,
            #        [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            #         [1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
            #         [1, 1, 1, 1, 1, 1, 1, 1, 0, 0]])
218 219 220 221 222 223 224 225 226

    """

    if in_dygraph_mode():
        if not isinstance(dtype, core.VarDesc.VarType):
            dtype = convert_np_dtype_to_dtype_(dtype)
        if maxlen is not None:
            if isinstance(maxlen, core.eager.Tensor):
                attrs = ('out_dtype', dtype)
227
                out = _legacy_C_ops.sequence_mask(x, maxlen, *attrs)
228 229
            else:
                attrs = ('out_dtype', dtype, 'maxlen', maxlen)
230
                out = _legacy_C_ops.sequence_mask(x, None, *attrs)
231 232 233 234 235 236 237 238 239 240 241 242 243 244
            out.stop_gradient = True
            return out

    helper = LayerHelper('sequence_mask', **locals())
    out = helper.create_variable_for_type_inference(dtype=dtype)

    inputs = {'X': [x]}
    attrs = {'out_dtype': out.dtype}
    if maxlen is not None:
        if isinstance(maxlen, Variable):
            inputs['MaxLenTensor'] = maxlen
        else:
            attrs['maxlen'] = maxlen

245 246 247
    helper.append_op(
        type='sequence_mask', inputs=inputs, outputs={'Y': out}, attrs=attrs
    )
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313

    out.stop_gradient = True
    return out


def gather_tree(ids, parents):
    r"""
    To be used after beam search. After beam search, we get selected ids at
    each time step and the corresponding parents in the search tree. Both ids
    and parents have the layout :attr:`[max_time, batch_size, beam_size]`. Then
    :attr:`gather_tree` is used to backtrace from the last time step and
    generate the full sequences by collecting selected ids.

    Here is an example:

    .. code-block:: text

            Given:
                ids = [[[2 2]
                        [6 1]]
                       [[3 9]
                        [6 1]]
                       [[0 1]
                        [9 0]]]
                parents = [[[0 0]
                            [1 1]]
                           [[1 0]
                            [1 0]]
                           [[0 0]
                            [0 1]]]

            Then:
                gather_tree(ids, parents)
                         = [[[2 2]
                             [1 6]]
                            [[3 3]
                             [6 1]]
                            [[0 1]
                             [9 0]]]

    Args:
        ids(Tensor): A Tensor with shape :attr:`[length, batch_size, beam_size]`
            and data type :attr:`int32` or :attr:`int64`. It contains the selected
            ids of all time steps.
        parents(Tensor): A Tensor with the same shape and data type as :attr:`ids`,
            It contains the parents corresponding to selected ids when searching
            among beams.

    Returns:
            A Tensor with the same shape and data type as :attr:`ids`. \
            It contains the full sequences. The sequences are collected from \
            :attr:`ids` by backtracing according to :attr:`parents`.

    Examples:
        .. code-block:: python

            import paddle

            ids = paddle.to_tensor([[[2, 2], [6, 1]], [[3, 9], [6, 1]], [[0, 1], [9, 0]]])

            parents = paddle.to_tensor([[[0, 0], [1, 1]], [[1, 0], [1, 0]], [[0, 0], [0, 1]]])

            final_sequences = paddle.nn.functional.gather_tree(ids, parents)
            # [[[2, 2], [1, 6]], [[3, 3], [6, 1]], [[0, 1], [9, 0]]]

    """
314 315 316 317 318 319 320
    if ids.ndim != 3:
        raise ValueError(
            "The input ids must be a 3D tensor with shape [length, batch_size, beam_size]"
        )
    if ids.ndim != parents.ndim:
        raise ValueError("The ids's shape must be the same as parents' shape. ")

321
    if in_dygraph_mode():
322
        return _C_ops.gather_tree(ids, parents)
323
    else:
姜永久 已提交
324 325 326 327 328 329
        helper = LayerHelper('gather_tree', **locals())
        check_variable_and_dtype(ids, 'ids', ['int32', 'int64'], 'gather_tree')
        check_variable_and_dtype(
            parents, 'parents', ['int32', 'int64'], 'gather_tree'
        )
        out = helper.create_variable_for_type_inference(dtype=ids.dtype)
330

姜永久 已提交
331 332 333 334 335 336 337
        helper.append_op(
            type="gather_tree",
            inputs={"Ids": ids, "Parents": parents},
            outputs={"Out": out},
        )

        return out
338 339 340 341 342 343 344


def temporal_shift(x, seg_num, shift_ratio=0.25, name=None, data_format="NCHW"):
    """

    **Temporal Shift Operator**

345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
    Calculate the temporal shifting features for Input(X).

    Input(X) should be in shape of [N*T, C, H, W] or [N*T, H, W, C], while
    N is the batch size, T is the temporal segment number specified by
    :attr:`seg_num`, C is the channel number, H and W is the height and
    width of features.

    Temporal Shifting is calculated as follows when data format is NCHW:

    Step 1: Reshape Input(X) to [N, T, C, H, W].

    Step 2: Pad 0 to reshaping result in the 2nd(T) dimension with
    padding width as 1 on each side, padding result will be in shape
    of [N, T+2, C, H, W].

    Step 3: Assume :attr:`shift_ratio` is :math:`1/4`, slice padding
    result as follows:

    $$
    slice1 = x[:, :T, :C/4, :, :]
    $$
    $$
    slice2 = x[:, 2:T+2, C/4:C/2, :, :]
    $$
    $$
    slice3 = x[:, 1:T+1, C/2:, :, :]
    $$

    Step 4: Concatenate three slices along the 3rd(C) dimension and
    reshape result to [N*T, C, H, W].

    For details of temporal shifting, please refer to paper:
    `Temporal Shift Module <http://arxiv.org/abs/1811.08383>`_ .
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402

    Args:
        x(Tensor): ${x_comment}
        seg_num(int): ${seg_num_comment}
        shift_ratio(float): ${shift_ratio_comment}
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.
        data_format(str, optional): Data format that specifies the layout of input.
            It can be "NCHW" or "NHWC". Default: "NCHW".

    Returns:
        out(Tensor): The temporal shifting result is a tensor with the
        same shape and same data type as the input.

    Examples:
        .. code-block:: python

            import paddle
            import paddle.nn.functional as F

            input = paddle.randn([6, 4, 2, 2])
            out = F.temporal_shift(x=input, seg_num=2, shift_ratio=0.2)
    """
    if data_format not in ["NCHW", "NHWC"]:
403 404 405 406
        raise ValueError(
            "Attr(data_format) should be 'NCHW' or 'NHWC'. "
            "Received Attr(data_format): {}.".format(data_format)
        )
C
ccrrong 已提交
407
    if in_dygraph_mode():
408
        return _C_ops.temporal_shift(x, seg_num, shift_ratio, data_format)
姜永久 已提交
409 410 411
    else:
        helper = LayerHelper("temporal_shift", **locals())
        check_variable_and_dtype(
412
            x, 'x', ['float16', 'float32', 'float64'], 'temporal_shift'
413
        )
姜永久 已提交
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
        check_type(seg_num, 'seg_num', int, 'temporal_shift')
        check_type(shift_ratio, 'shift_ratio', float, 'temporal_shift')

        out = helper.create_variable_for_type_inference(dtype=x.dtype)

        if not isinstance(seg_num, int):
            raise TypeError("seg_num must be int type.")

        helper.append_op(
            type="temporal_shift",
            inputs={"X": x},
            outputs={"Out": out},
            attrs={
                "seg_num": seg_num,
                "shift_ratio": shift_ratio,
                "data_format": data_format,
            },
        )
        return out