layer_wrappers.py 18.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
H
Hongsheng Zeng 已提交
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
Wrappers for fluid.layers. It helps to easily share parameters between layers.

Here is an example:
    ```python
    import parl.layers as layers

    class MLPModel(Model):
        def __init__(self):
            self.fc = layers.fc(size=64) # automatically create parameters names "fc_0.w" and "fc_0.b"

        def policy1(self, obs):
            out = self.fc(obs) # Really create parameters with parameters names "fc_0.w" and "fc_0.b"
        
        def policy2(self, obs):
            out = self.fc(obs) # Reusing parameters
    ```
31 32
"""

33
import inspect
34
import paddle.fluid.layers as layers
H
haonanyu 已提交
35
import paddle.fluid.unique_name as unique_name
36 37
import paddle.fluid as fluid
import six
38
from copy import deepcopy
39 40 41 42
from paddle.fluid.executor import _fetch_var
from paddle.fluid.framework import Variable
from paddle.fluid.layers import *
from paddle.fluid.param_attr import ParamAttr
43
from parl.layers.attr_holder import AttrHolder
44 45


H
haonanyu 已提交
46
def update_attr_name(name, default_name, attr, is_bias):
47
    """
H
haonanyu 已提交
48 49 50 51 52 53 54 55
    Update the name in an attribute
    1. If the user provides a name, then generate the candidate name using the
       provided name;
    2. else generate the candidate name using the default name (which should be
       the name of the layer wrapper).
    3. After obtaining the candidate name, if the attr is False, then we return False;
    4. if the attr is None or attr.name is None, then we set the attr's name as the candidate name;
    5. else we ignore the candidate name and do nothing.
56 57
    """

H
haonanyu 已提交
58 59 60 61
    def check_or_replace_name(name, attr):
        ## if this para is not used
        if attr == False:
            return False
62

H
haonanyu 已提交
63 64
        if attr is None:
            return ParamAttr(name=name)
65

H
haonanyu 已提交
66 67 68
        if attr.name is None:
            attr.name = name
        return attr
69

H
haonanyu 已提交
70 71 72 73
    name = (default_name if name is None else name)
    suffix = "b" if is_bias else "w"
    new_name = unique_name.generate(name + "." + suffix)
    return check_or_replace_name(new_name, attr)
74 75


76
class LayerFunc(object):
77 78
    def __init__(self, attr_holder):
        self.attr_holder = attr_holder
79 80 81 82 83 84 85 86 87

    def __deepcopy__(self, memo):
        cls = self.__class__
        ## __new__ won't init the class, we need to do that ourselves
        copied = cls.__new__(cls)
        ## record in the memo that self has been copied to avoid recursive copying
        memo[id(self)] = copied

        ## first copy all content
88
        for k, v in six.iteritems(self.__dict__):
89 90
            setattr(copied, k, deepcopy(v, memo))

91
        ## then we need to create new para names for param_attr in self.attr_holder
92 93 94
        def create_new_para_name(attr):
            if attr:
                assert attr.name, "attr should have a name already!"
95
                name_key = 'PARL_target_' + attr.name
96 97
                attr.name = unique_name.generate(name_key)

98 99 100
        for attr in copied.attr_holder.tolist():
            create_new_para_name(attr)

101 102 103 104 105 106 107 108
        ## We require the user to sync the parameter values later, because
        ## this deepcopy is supposed to be called only before the startup
        ## program. This function will cause the computation graph change, so
        ## it cannot be called during the execution.
        return copied

    @property
    def param_name(self):
109 110
        if self.attr_holder.param_attr:
            return self.attr_holder.param_attr.name
111 112 113 114 115
        else:
            return None

    @property
    def bias_name(self):
116 117
        if self.attr_holder.bias_attr:
            return self.attr_holder.bias_attr.name
118 119 120
        else:
            return None

121 122 123 124 125 126 127 128
    @property
    def all_params_names(self):
        params_names = []
        for attr in self.attr_holder.tolist():
            if attr:
                params_names.append(attr.name)
        return params_names

129

130 131 132 133 134
def fc(size,
       num_flatten_dims=1,
       param_attr=None,
       bias_attr=None,
       act=None,
H
update  
haonanyu 已提交
135
       name=None):
136 137 138
    """
    Return a function that creates a paddle.fluid.layers.fc.
    """
H
haonanyu 已提交
139 140 141
    default_name = "fc"
    param_attr = update_attr_name(name, default_name, param_attr, False)
    bias_attr = update_attr_name(name, default_name, bias_attr, True)
142 143 144

    class FC_(LayerFunc):
        def __init__(self):
145 146
            super(FC_, self).__init__(
                AttrHolder(param_attr=param_attr, bias_attr=bias_attr))
147

148
        def __call__(self, input, is_test=False):
B
Bo Zhou 已提交
149 150 151 152
            return layers.fc(
                input=input,
                size=size,
                num_flatten_dims=num_flatten_dims,
153 154
                param_attr=self.attr_holder.param_attr,
                bias_attr=self.attr_holder.bias_attr,
B
Bo Zhou 已提交
155 156
                act=act,
                is_test=is_test)
157 158 159 160 161 162 163 164 165 166

    return FC_()


def embedding(size,
              is_sparse=False,
              is_distributed=False,
              padding_idx=None,
              param_attr=None,
              dtype="float32",
H
update  
haonanyu 已提交
167
              name=None):
168 169 170
    """
    Return a function that creates a paddle.fluid.layers.embedding.
    """
H
haonanyu 已提交
171
    param_attr = update_attr_name(name, "embedding", param_attr, False)
172 173 174

    class Embedding_(LayerFunc):
        def __init__(self):
175
            super(Embedding_, self).__init__(AttrHolder(param_attr=param_attr))
176 177 178 179 180 181 182 183

        def __call__(self, input):
            return layers.embedding(
                input=input,
                size=size,
                is_sparse=is_sparse,
                is_distributed=is_distributed,
                padding_idx=padding_idx,
184
                param_attr=self.attr_holder.param_attr,
185 186 187 188 189 190 191 192 193 194 195 196 197 198
                dtype=dtype)

    return Embedding_()


def dynamic_lstm(size,
                 param_attr=None,
                 bias_attr=None,
                 use_peepholes=True,
                 is_reverse=False,
                 gate_activation="sigmoid",
                 cell_activation="tanh",
                 candidate_activation="tanh",
                 dtype="float32",
H
update  
haonanyu 已提交
199
                 name=None):
200 201 202
    """
    Return a function that creates a paddle.fluid.layers.dynamic_lstm.
    """
H
haonanyu 已提交
203 204 205
    default_name = "dynamic_lstm"
    param_attr = update_attr_name(name, default_name, param_attr, False)
    bias_attr = update_attr_name(name, default_name, bias_attr, True)
206 207 208

    class DynamicLstm_(LayerFunc):
        def __init__(self):
209 210
            super(DynamicLstm_, self).__init__(
                AttrHolder(param_attr=param_attr, bias_attr=bias_attr))
211

212
        def __call__(self, input, h_0=None, c_0=None):
213 214
            return layers.dynamic_lstm(
                input=input,
215 216
                h_0=h_0,
                c_0=c_0,
217
                size=size,
218 219
                param_attr=self.attr_holder.param_attr,
                bias_attr=self.attr_holder.bias_attr,
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
                use_peepholes=use_peepholes,
                is_reverse=is_reverse,
                gate_activation=gate_activation,
                cell_activation=cell_activation,
                candidate_activation=candidate_activation,
                dtype=dtype)

    return DynamicLstm_()


def dynamic_lstmp(size,
                  proj_size,
                  param_attr=None,
                  bias_attr=None,
                  use_peepholes=True,
                  is_reverse=False,
                  gate_activation='sigmoid',
                  cell_activation='tanh',
                  candidate_activation='tanh',
                  proj_activation='tanh',
                  dtype='float32',
H
update  
haonanyu 已提交
241
                  name=None):
242 243 244
    """
    Return a function that creates a paddle.fluid.layers.dynamic_lstmp.
    """
H
haonanyu 已提交
245 246 247
    default_name = "dynamic_lstmp"
    param_attr = update_attr_name(name, default_name, param_attr, False)
    bias_attr = update_attr_name(name, default_name, bias_attr, True)
248 249 250

    class DynamicLstmp_(LayerFunc):
        def __init__(self):
251 252
            super(DynamicLstmp_, self).__init__(
                AttrHolder(param_attr=param_attr, bias_attr=bias_attr))
253 254 255 256 257 258

        def __call__(self, input):
            return layers.dynamic_lstmp(
                input=input,
                size=size,
                proj_size=proj_size,
259 260
                param_attr=self.attr_holder.param_attr,
                bias_attr=self.attr_holder.bias_attr,
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
                use_peepholes=use_peepholes,
                is_reverse=is_reverse,
                gate_activation=gate_activation,
                cell_activation=cell_activation,
                candidate_activation=candidate_activation,
                proj_activation=proj_activation,
                dtype=dtype)

    return DynamicLstmp_()


def dynamic_gru(size,
                param_attr=None,
                bias_attr=None,
                is_reverse=False,
                gate_activation='sigmoid',
                candidate_activation='tanh',
H
update  
haonanyu 已提交
278
                name=None):
279 280 281
    """
    Return a function that creates a paddle.fluid.layers.dynamic_gru.
    """
H
haonanyu 已提交
282 283 284
    default_name = "dynamic_gru"
    param_attr = update_attr_name(name, default_name, param_attr, False)
    bias_attr = update_attr_name(name, default_name, bias_attr, True)
285 286 287

    class DynamicGru_(LayerFunc):
        def __init__(self):
288 289
            super(DynamicGru_, self).__init__(
                AttrHolder(param_attr=param_attr, bias_attr=bias_attr))
290

291
        def __call__(self, input, h_0=None):
292 293 294
            return layers.dynamic_gru(
                input=input,
                size=size,
295 296
                param_attr=self.attr_holder.param_attr,
                bias_attr=self.attr_holder.bias_attr,
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
                is_reverse=is_reverse,
                gate_activation=gate_activation,
                candidate_activation=candidate_activation,
                h_0=h_0)

    return DynamicGru_()


def gru_unit(**kwargs):
    """
    We cannot pass param_attr or bias_attr to paddle.fluid.layers.gru_unit yet.
    """
    raise NotImplementedError()


def linear_chain_crf(**kwargs):
    raise NotImplementedError()


def crf_decoding(**kwargs):
    raise NotImplementedError()


def sequence_conv(num_filters,
                  filter_size=3,
                  filter_stride=1,
                  padding=None,
                  bias_attr=None,
                  param_attr=None,
                  act=None,
H
update  
haonanyu 已提交
327
                  name=None):
328 329 330
    """
    Return a function that creates a paddle.fluid.layers.sequence_conv.
    """
H
haonanyu 已提交
331 332 333
    default_name = "sequence_conv"
    param_attr = update_attr_name(name, default_name, param_attr, False)
    bias_attr = update_attr_name(name, default_name, bias_attr, True)
334 335 336

    class SequenceConv_(LayerFunc):
        def __init__(self):
337 338
            super(SequenceConv_, self).__init__(
                AttrHolder(param_attr=param_attr, bias_attr=bias_attr))
339 340 341 342 343 344 345 346

        def __call__(self, input):
            return layers.sequence_conv(
                input=input,
                num_filters=num_filters,
                filter_size=filter_size,
                filter_stride=filter_stride,
                padding=padding,
347 348
                bias_attr=self.attr_holder.bias_attr,
                param_attr=self.attr_holder.param_attr,
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
                act=act)

    return SequenceConv_()


def conv2d(num_filters,
           filter_size,
           stride=1,
           padding=0,
           dilation=1,
           groups=None,
           param_attr=None,
           bias_attr=None,
           use_cudnn=True,
           act=None,
H
update  
haonanyu 已提交
364
           name=None):
365 366 367
    """
    Return a function that creates a paddle.fluid.layers.conv2d.
    """
H
haonanyu 已提交
368 369 370
    default_name = "conv2d"
    param_attr = update_attr_name(name, default_name, param_attr, False)
    bias_attr = update_attr_name(name, default_name, bias_attr, True)
371 372 373

    class Conv2D_(LayerFunc):
        def __init__(self):
374 375
            super(Conv2D_, self).__init__(
                AttrHolder(param_attr=param_attr, bias_attr=bias_attr))
376 377 378 379 380 381 382 383 384 385

        def __call__(self, input):
            return layers.conv2d(
                input=input,
                num_filters=num_filters,
                filter_size=filter_size,
                stride=stride,
                padding=padding,
                dilation=dilation,
                groups=groups,
386 387
                param_attr=self.attr_holder.param_attr,
                bias_attr=self.attr_holder.bias_attr,
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
                use_cudnn=use_cudnn,
                act=act)

    return Conv2D_()


def conv2d_transpose(num_filters,
                     output_size=None,
                     filter_size=None,
                     padding=0,
                     stride=1,
                     dilation=1,
                     param_attr=None,
                     bias_attr=None,
                     use_cudnn=True,
                     act=None,
H
update  
haonanyu 已提交
404
                     name=None):
405 406 407
    """
    Return a function that creates a paddle.fluid.layers.conv2d_transpose.
    """
H
haonanyu 已提交
408 409 410
    default_name = "conv2d_transpose"
    param_attr = update_attr_name(name, default_name, param_attr, False)
    bias_attr = update_attr_name(name, default_name, bias_attr, True)
411 412 413

    class Conv2DTranspose_(LayerFunc):
        def __init__(self):
414 415
            super(Conv2DTranspose_, self).__init__(
                AttrHolder(param_attr=param_attr, bias_attr=bias_attr))
416 417 418 419 420 421 422 423 424 425

        def __call__(self, input):
            return layers.conv2d_transpose(
                input=input,
                num_filters=num_filters,
                output_size=output_size,
                filter_size=filter_size,
                padding=padding,
                stride=stride,
                dilation=dilation,
426 427
                param_attr=self.attr_holder.param_attr,
                bias_attr=self.attr_holder.bias_attr,
428 429 430 431 432 433
                use_cudnn=use_cudnn,
                act=act)

    return Conv2DTranspose_()


H
update  
haonanyu 已提交
434
def lstm_unit(forget_bias=0.0, param_attr=None, bias_attr=None, name=None):
435 436 437
    """
    Return a function that creates a paddle.fluid.layers.lstm_unit.
    """
H
haonanyu 已提交
438 439 440
    default_name = "lstm_unit"
    param_attr = update_attr_name(name, default_name, param_attr, False)
    bias_attr = update_attr_name(name, default_name, bias_attr, True)
441 442 443

    class LstmUnit_(LayerFunc):
        def __init__(self):
444 445
            super(LstmUnit_, self).__init__(
                AttrHolder(param_attr=param_attr, bias_attr=bias_attr))
446 447 448 449 450 451 452

        def __call__(self, x_t, hidden_t_prev, cell_t_prev):
            return layers.lstm_unit(
                x_t=x_t,
                hidden_t_prev=hidden_t_prev,
                cell_t_prev=cell_t_prev,
                forget_bias=forget_bias,
453
                param_attr=self.attr_holder.param_attr,
454
                bias_attr=self.bias_attr)
455 456 457 458 459 460 461 462

    return LstmUnit_()


def nce(**kwargs):
    raise NotImplementedError()


H
update  
haonanyu 已提交
463
def row_conv(future_context_size, param_attr=None, act=None, name=None):
464 465 466
    """
    Return a function that creates a paddle.fluid.layers.row_conv.
    """
H
haonanyu 已提交
467
    param_attr = update_attr_name(name, "row_conv", param_attr, False)
468 469 470

    class RowConv_(LayerFunc):
        def __init__(self):
471
            super(RowConv_, self).__init__(AttrHolder(param_attr=param_attr))
472 473 474 475 476

        def __call__(self, input):
            return layers.row_conv(
                input=input,
                future_context_size=future_context_size,
477
                param_attr=self.attr_holder.param_attr,
478 479 480 481 482 483 484
                act=act)

    return RowConv_()


def layer_norm(**kwargs):
    raise NotImplementedError()
H
Haonan 已提交
485 486


487 488 489 490 491 492 493 494 495 496 497 498
def batch_norm(act=None,
               momentum=0.9,
               epsilon=1e-05,
               param_attr=None,
               bias_attr=None,
               data_layout='NCHW',
               in_place=False,
               name=None,
               moving_mean_name=None,
               moving_variance_name=None,
               do_model_average_for_mean_and_var=False,
               fuse_with_relu=False):
H
Haonan 已提交
499
    """
500
    Return a function that creates a paddle.fluid.layers.batch_norm.
H
Haonan 已提交
501 502

    """
503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
    default_name = "batch_norm"
    param_attr = update_attr_name(name, default_name, param_attr, False)
    bias_attr = update_attr_name(name, default_name, bias_attr, True)
    moving_mean_attr = update_attr_name(name, default_name + "_moving_mean",
                                        None, False)
    moving_variance_attr = update_attr_name(
        name, default_name + "_moving_variance", None, False)

    class BatchNorm_(LayerFunc):
        def __init__(self):
            super(BatchNorm_, self).__init__(
                AttrHolder(
                    param_attr=param_attr,
                    bias_attr=bias_attr,
                    moving_mean_attr=moving_mean_attr,
                    moving_variance_attr=moving_variance_attr))

        def __call__(self, input, is_test=False):
            return layers.batch_norm(
                input=input,
                act=act,
                is_test=is_test,
                momentum=momentum,
                epsilon=epsilon,
                param_attr=self.attr_holder.param_attr,
                bias_attr=self.attr_holder.bias_attr,
                data_layout=data_layout,
                in_place=in_place,
                name=name,
                moving_mean_name=self.attr_holder.moving_mean_attr.name,
                moving_variance_name=self.attr_holder.moving_variance_attr.
                name,
                do_model_average_for_mean_and_var=
                do_model_average_for_mean_and_var,
                fuse_with_relu=fuse_with_relu)

    return BatchNorm_()


def create_parameter(shape,
                     dtype,
                     name=None,
                     attr=None,
                     is_bias=False,
                     default_initializer=None):
    """
    Return a function that creates a paddle.fluid.layers.create_parameter.

    """
    param_attr = update_attr_name(name, "create_parameter", attr, False)

    class CreateParameter_(LayerFunc):
        def __init__(self):
            super(CreateParameter_, self).__init__(
                AttrHolder(param_attr=param_attr))
H
Haonan 已提交
558 559 560 561 562

        def __call__(self):
            return layers.create_parameter(
                shape=shape,
                dtype=dtype,
563
                attr=self.attr_holder.param_attr,
H
Haonan 已提交
564 565 566 567
                is_bias=is_bias,
                default_initializer=default_initializer)

    return CreateParameter_()