parameters.py 14.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Y
Yu Yang 已提交
15
import numpy as np
Q
qiaolongfei 已提交
16
from paddle.proto.ParameterConfig_pb2 import ParameterConfig
C
caoying03 已提交
17
from collections import OrderedDict
X
xuwei06 已提交
18
import paddle.trainer.config_parser as cp
Y
Yu Yang 已提交
19 20 21
import struct
import tarfile
import cStringIO
Q
qiaolongfei 已提交
22
from topology import Topology
Q
qiaolongfei 已提交
23

Y
Yu Yang 已提交
24
__all__ = ['Parameters', 'create']
Y
Yu Yang 已提交
25 26


Q
qiaolongfei 已提交
27
def create(layers):
Y
Yu Yang 已提交
28
    """
Q
qiaolongfei 已提交
29
    Create parameter pool by topology.
Y
Yu Yang 已提交
30

Q
qiaolongfei 已提交
31
    :param layers:
Y
Yu Yang 已提交
32
    :return:
Y
Yu Yang 已提交
33
    """
Q
qiaolongfei 已提交
34
    topology = Topology(layers)
Q
qiaolongfei 已提交
35
    pool = Parameters()
X
xuwei06 已提交
36
    initializers = cp.g_parameter_initializer_map
Q
qiaolongfei 已提交
37
    for param in topology.proto().parameters:
Q
qiaolongfei 已提交
38
        pool.__append_config__(param)
X
xuwei06 已提交
39 40
        if param.name in initializers:
            pool[param.name] = initializers[param.name](param.name)
Y
Yu Yang 已提交
41
    return pool
Y
Yu Yang 已提交
42 43


Y
Yu Yang 已提交
44
class Parameters(object):
Y
Yu Yang 已提交
45
    """
C
caoying03 已提交
46
    `Parameters` manages all the learnable parameters in a neural network.
47 48 49 50 51 52 53 54 55 56
    It stores parameters' information in an OrderedDict. The key is
    the name of a parameter, and value is a parameter's configuration(in
    protobuf format), such as initialization mean and std, its size, whether it
    is a static parameter, and so on.

    :param __param_conf__: store the configurations of learnable parameters in
        the network in an OrderedDict. Parameter is added one by one into the
        dict by following their created order in the network: parameters of
        the previous layers in a network are careted first. You can visit the
        parameters from bottom to top by iterating over this dict.
C
caoying03 已提交
57 58
    :type __param_conf__: OrderedDict
    :param __gradient_machines__: all of the parameters in a neural network are
59 60
        appended to a PaddlePaddle gradient machine, which is used internally to
        copy parameter values between C++ and Python end.
C
caoying03 已提交
61 62 63 64
    :type __gradient_machines__: list
    :param __tmp_params__: a dict to store dummy parameters if no
        __gradient_machines__ is appended to `Parameters`.
    :type __tmp_params__: dict
Y
Yu Yang 已提交
65 66 67 68 69 70 71 72 73 74 75 76 77 78

    Basically usage is

    ..  code-block:: python

        data = paddle.layers.data(...)
        ...
        out = paddle.layers.fc(...)

        parameters = paddle.parameters.create(out)

        parameter_names = parameters.names()
        fc_mat = parameters.get('fc')
        print fc_mat
Y
Yu Yang 已提交
79 80
    """

Y
Yu Yang 已提交
81
    def __init__(self):
C
caoying03 已提交
82
        self.__param_conf__ = OrderedDict()
Y
Yu Yang 已提交
83
        self.__gradient_machines__ = []
84
        self.__tmp_params__ = dict()
Y
Yu Yang 已提交
85

Y
Yu Yang 已提交
86 87 88 89 90 91 92 93 94 95
    def __append_config__(self, param_conf):
        """
        Append a parameter configuration. It used to initialize Parameters and
        should be invoked only in paddle.parameters.create

        :param param_conf: The parameter configuration in protobuf
        :type param_conf: ParameterConfig
        :return: Nothing
        """

Y
Yu Yang 已提交
96 97 98 99 100 101 102 103
        if not isinstance(param_conf, ParameterConfig):
            raise ValueError("param_conf must be paddle.proto.ParameterConfig")

        if param_conf.name in self.__param_conf__:
            raise ValueError("duplicated parameter %s" % param_conf.name)

        self.__param_conf__[param_conf.name] = param_conf

T
update  
typhoonzero 已提交
104 105 106 107
    def update_param_conf(self, model_config):
        for p in model_config.parameters:
            self.__param_conf__[p.name] = p

Y
Yu Yang 已提交
108
    def keys(self):
Y
Yu Yang 已提交
109 110
        """
        keys are the names of each parameter.
Y
Yu Yang 已提交
111

Y
Yu Yang 已提交
112 113 114
        :return: list of parameter name
        :rtype: list
        """
Y
Yu Yang 已提交
115 116 117
        return self.__param_conf__.keys()

    def names(self):
Y
Yu Yang 已提交
118 119
        """
        names of each parameter.
Y
Yu Yang 已提交
120

Y
Yu Yang 已提交
121 122 123
        :return: list of parameter name
        :rtype: list
        """
Y
Yu Yang 已提交
124 125 126
        return self.keys()

    def has_key(self, key):
Y
Yu Yang 已提交
127 128
        """
        has_key return true if there are such parameter name == key
Y
Yu Yang 已提交
129

Y
Yu Yang 已提交
130 131 132 133
        :param key: Parameter name
        :type key: basestring
        :return: True if contains such key
        """
Y
Yu Yang 已提交
134 135
        return key in self.__param_conf__.keys()

Y
Yu Yang 已提交
136
    def __iter__(self):
Y
Yu Yang 已提交
137 138 139 140 141 142 143 144 145 146 147 148
        """
        Return an iterator of parameter name. It is used by `for loop`
        or `in` operator.

        ..  code-block:: python

            parameters = paddle.parameters.create(...)
            if "fc_param" in parameters:
                print 'OK'
        :return: an iterator of parameter name
        :rtype: iterator
        """
Y
Yu Yang 已提交
149 150
        return iter(self.__param_conf__)

151
    def __getter_inner(self, key, param_type):
Y
Yu Yang 已提交
152
        import py_paddle.swig_paddle as api
Y
Yu Yang 已提交
153 154 155 156
        shape = self.get_shape(key)

        if len(self.__gradient_machines__) == 0:
            # create new parameter in python numpy.
157 158 159 160
            if key in self.__tmp_params__:
                return self.__tmp_params__[key]
            else:
                return np.ndarray(shape=shape, dtype=np.float32)
Y
Yu Yang 已提交
161 162 163 164 165 166
        else:
            for each_gradient_machine in self.__gradient_machines__:
                param = __get_parameter_in_gradient_machine__(
                    each_gradient_machine, key)
                # for simplify implementation now, we always copy from C++
                assert isinstance(param, api.Parameter)
167
                val = param.getBuf(param_type)
Y
Yu Yang 已提交
168
                assert isinstance(val, api.Vector)
Y
Yu Yang 已提交
169 170
                val = val.copyToNumpyArray()
                return val
Y
Yu Yang 已提交
171 172 173 174
                # else continue

            raise RuntimeError("Unexpected branch")

175 176 177 178 179 180 181 182 183 184 185 186 187
    def __getitem__(self, key):
        """
        Get parameter by parameter name. It uses Python dict syntax.

        :note: It will always copy the parameter from C++ side.
        :param key: Parameter name
        :type key: basestring
        :return: parameter value
        :rtype: np.ndarray
        """
        import py_paddle.swig_paddle as api
        return self.__getter_inner(key, api.PARAMETER_VALUE)

Y
Yu Yang 已提交
188
    def get_shape(self, key):
Y
Yu Yang 已提交
189 190
        """
        get shape of the parameter.
Y
Yu Yang 已提交
191

Y
Yu Yang 已提交
192 193 194 195 196
        :param key: parameter name
        :type key: basestring
        :return: parameter's shape
        :rtype: tuple
        """
Y
Yu Yang 已提交
197 198 199 200 201
        if not isinstance(key, basestring):
            raise ValueError("parameter name should be string")
        if not self.has_key(key):
            raise ValueError("No such parameter %s" % key)
        conf = self.__param_conf__[key]
D
dangqingqing 已提交
202 203
        dims = conf.dims if conf.dims else (1, conf.size)
        return tuple(map(int, dims))
Y
Yu Yang 已提交
204 205

    def __setitem__(self, key, value):
Y
Yu Yang 已提交
206 207 208 209 210 211 212 213 214 215 216
        """
        Set parameter by parameter name & value. It use Python dict syntax.

        :note: It will always copy the parameter to C++ side.
        :param key: Parameter name
        :type key: basestring
        :param value: Parameter matrix.
        :type value: np.ndarray
        :return: Nothing
        """

Y
Yu Yang 已提交
217 218 219 220
        if not isinstance(value, np.ndarray):
            raise ValueError("Must return ndarray")
        value = value.astype(dtype=np.float32)
        shape = self.get_shape(key)
Y
Yu Yang 已提交
221
        if value.shape != shape:
Y
Yu Yang 已提交
222 223 224 225
            raise ValueError("Value shape mismatch, expect %s, should %s" %
                             (shape, value.shape))

        if len(self.__gradient_machines__) == 0:
226
            self.__tmp_params__[key] = value
Y
Yu Yang 已提交
227 228 229 230 231
        else:
            for each_gradient_machine in self.__gradient_machines__:
                __copy_parameter_to_gradient_machine__(each_gradient_machine,
                                                       key, value)

Y
Yu Yang 已提交
232
    def get(self, parameter_name):
Y
Yu Yang 已提交
233 234 235 236 237 238 239 240 241
        """
        Get parameter by parameter name.

        :note: It will always copy the parameter from C++ side.
        :param parameter_name: parameter name
        :type parameter_name: basestring
        :return: The parameter matrix.
        :rtype: np.ndarray
        """
Y
Yu Yang 已提交
242 243
        return self.__getitem__(key=parameter_name)

244 245 246 247 248 249 250 251 252 253 254
    def get_grad(self, key):
        """
        Get grandient by parameter name.

        :note: It will always copy the parameter from C++ side.
        :param key: parameter name
        :type key: basestring
        :return: The grandient matrix.
        :rtype: np.ndarray
        """
        import py_paddle.swig_paddle as api
C
caoying03 已提交
255 256 257
        if self.__param_conf__[key].is_static:
            return np.zeros(self.__param_conf__[key].size, dtype=np.float32)

258 259
        return self.__getter_inner(key, api.PARAMETER_GRADIENT)

Y
Yu Yang 已提交
260
    def set(self, parameter_name, value):
Y
Yu Yang 已提交
261 262
        """
        Set parameter by parameter name & matrix.
Y
Yu Yang 已提交
263

Y
Yu Yang 已提交
264 265 266 267 268 269
        :param parameter_name: parameter name
        :type parameter_name: basestring
        :param value: parameter matrix
        :type value: np.ndarray
        :return: Nothing.
        """
Y
Yu Yang 已提交
270 271
        self.__setitem__(key=parameter_name, value=value)

Y
Yu Yang 已提交
272
    def append_gradient_machine(self, gradient_machine):
Y
Yu Yang 已提交
273 274 275 276
        """
        append gradient machine to parameters. This method is used internally in
        Trainer.train.

277
        :param gradient_machine: PaddlePaddle C++ GradientMachine object.
Y
Yu Yang 已提交
278 279 280
        :type gradient_machine: api.GradientMachine
        :return:
        """
Y
Yu Yang 已提交
281
        import py_paddle.swig_paddle as api
Y
Yu Yang 已提交
282 283 284 285
        if not isinstance(gradient_machine, api.GradientMachine):
            raise ValueError("gradient_machine should be api.GradientMachine")

        if len(self.__tmp_params__) != 0:
286
            for name, val in self.__tmp_params__.iteritems():
Y
Yu Yang 已提交
287 288 289 290 291 292
                try:
                    __copy_parameter_to_gradient_machine__(gradient_machine,
                                                           name, val)
                except ValueError:
                    # If no such parameter in gradient machine, then don't copy
                    pass
293 294

        self.__gradient_machines__.append(gradient_machine)
Y
Yu Yang 已提交
295

Y
Yu Yang 已提交
296 297 298 299 300 301 302 303 304 305 306 307
    def serialize(self, name, f):
        """

        :param name:
        :param f:
        :type f: file
        :return:
        """
        param = self.get(name)
        size = reduce(lambda a, b: a * b, param.shape)
        f.write(struct.pack("IIQ", 0, 4, size))
        param = param.astype(np.float32)
308 309 310 311 312 313 314
        s = param.tostring()
        wrote_size = 0
        buf = buffer(s, wrote_size, 65535)
        while buf:  # f.write crashes with big data blog.
            f.write(buf)
            wrote_size += 65535
            buf = buffer(s, wrote_size, 65535)
Y
Yu Yang 已提交
315 316 317 318 319 320 321 322 323 324

    def deserialize(self, name, f):
        """

        :param name:
        :param f:
        :type f: file
        :return:
        """
        f.read(16)  # header
Y
Yu Yang 已提交
325
        arr = np.frombuffer(f.read(), dtype=np.float32)
Y
Yu Yang 已提交
326 327
        self.set(name, arr.reshape(self.get_shape(name)))

Y
Yu Yang 已提交
328
    def to_tar(self, f):
329 330 331
        """
        Save parameters to a tar file.

P
Peng Li 已提交
332 333 334
        WARNING: You should use `paddle.v2.trainer.SGD.save_parameter_to_tar(f)`
            to save parameters most of the time. Otherwise, some settings such
            as model average will not take effect.
335 336 337 338 339

        :param f:
        :type f: file
        :return:
        """
Y
Yu Yang 已提交
340 341 342 343 344 345 346 347 348
        tar = tarfile.TarFile(fileobj=f, mode='w')
        for nm in self.names():
            buf = cStringIO.StringIO()
            self.serialize(nm, buf)
            tarinfo = tarfile.TarInfo(name=nm)
            buf.seek(0)
            tarinfo.size = len(buf.getvalue())
            tar.addfile(tarinfo, buf)

Y
Yu Yang 已提交
349 350 351 352 353 354 355 356 357 358
            conf = self.__param_conf__[nm]
            confStr = conf.SerializeToString()
            tarinfo = tarfile.TarInfo(name="%s.protobuf" % nm)
            tarinfo.size = len(confStr)
            buf = cStringIO.StringIO(confStr)
            buf.seek(0)
            tar.addfile(tarinfo, fileobj=buf)

    @staticmethod
    def from_tar(f):
D
dangqingqing 已提交
359 360 361 362 363 364 365 366 367 368 369 370
        """
        Create a `Parameters` object from the given file. And
        the `Parameters` only contains the parameters in this
        file. It is adapted the parameters are same in the
        defined network and the given file. For example, it
        can be used in the inference.

        :param f: the initialized model file.
        :type f: tar file
        :return: A Parameters object.
        :rtype: Parameters.
        """
Y
Yu Yang 已提交
371 372 373 374 375 376 377 378 379 380 381 382 383 384
        params = Parameters()
        tar = tarfile.TarFile(fileobj=f, mode='r')
        for finfo in tar:
            assert isinstance(finfo, tarfile.TarInfo)
            if finfo.name.endswith('.protobuf'):
                f = tar.extractfile(finfo)
                conf = ParameterConfig()
                conf.ParseFromString(f.read())
                params.__append_config__(conf)

        for param_name in params.names():
            f = tar.extractfile(param_name)
            params.deserialize(param_name, f)
        return params
Y
Yu Yang 已提交
385

386
    def init_from_tar(self, f, exclude_params=[]):
D
dangqingqing 已提交
387 388 389 390 391 392
        """
        Different from `from_tar`, this interface can be used to
        init partial network parameters from another saved model.

        :param f: the initialized model file.
        :type f: tar file
393 394
        :param exclude_params: the names of parameters that should  
            not be initialized from the model file.
395
        :type exclude_params: list of strings
D
dangqingqing 已提交
396 397 398
        :return: Nothing.
        """

399
        tar_param = Parameters.from_tar(f)
400
        for pname in tar_param.names():
401
            if pname in self.names() and pname not in exclude_params:
402 403
                self.set(pname, tar_param.get(pname))

Y
Yu Yang 已提交
404 405 406

def __get_parameter_in_gradient_machine__(gradient_machine, name):
    """
Y
Yu Yang 已提交
407

Y
Yu Yang 已提交
408 409 410 411 412 413 414 415
    :param gradient_machine:
    :type gradient_machine: api.GradientMachine
    :param name:
    :return:
    :rtype: api.Parameter
    """
    params = filter(lambda p: p.getName() == name,
                    gradient_machine.getParameters())
Y
Yu Yang 已提交
416

Y
Yu Yang 已提交
417 418 419 420 421 422
    if len(params) == 0:
        raise ValueError("No such parameter")
    elif len(params) > 1:
        raise ValueError("Unexpected branch")
    else:
        return params[0]
Y
Yu Yang 已提交
423 424


Y
Yu Yang 已提交
425
def __copy_parameter_to_gradient_machine__(gradient_machine, name, arr):
Y
Yu Yang 已提交
426
    """
Y
Yu Yang 已提交
427
    Copy a python ndarray into the gradient machine.
Y
Yu Yang 已提交
428

Y
Yu Yang 已提交
429 430 431 432 433
    :param gradient_machine:
    :type gradient_machine: api.GradientMachine
    :param name:
    :param arr:
    :type arr: np.ndarray
Y
Yu Yang 已提交
434
    :return:
Y
Yu Yang 已提交
435
    :rtype: api.Parameter
Y
Yu Yang 已提交
436
    """
Y
Yu Yang 已提交
437
    import py_paddle.swig_paddle as api
Y
Yu Yang 已提交
438 439 440 441
    param = __get_parameter_in_gradient_machine__(gradient_machine, name)
    vec = param.getBuf(api.PARAMETER_VALUE)
    assert isinstance(vec, api.Vector)
    vec.copyFromNumpyArray(arr.flatten())