diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 73135dfafbb86f857fb32e633ea2efddca873035..060c62e9ec0c1c4c028ffbf5da4240b49a3de35b 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -36,7 +36,7 @@ from .framework import disable_static # noqa: F401 from .framework import enable_static # noqa: F401 from .framework import in_dynamic_mode # noqa: F401 from .fluid.dataset import * # noqa: F401 -from .fluid.lazy_init import LazyInit # noqa: F401 +from .fluid.lazy_init import LazyGuard # noqa: F401 from .framework.dtype import dtype as dtype # noqa: F401 from .framework.dtype import uint8 # noqa: F401 @@ -417,7 +417,7 @@ __all__ = [ # noqa 'cumprod', 'logcumsumexp', 'logit', - 'LazyInit', + 'LazyGuard', 'sign', 'is_empty', 'equal', diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index cf2bd3a69bd9000ff458574ed412e3fedf9b5937..879900085d57e32a2858674799a33976ab8cab66 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -38,6 +38,7 @@ from paddle.fluid import framework from ..param_attr import ParamAttr from paddle.fluid.executor import Executor, global_scope from paddle.fluid.framework import _non_static_mode, convert_np_dtype_to_dtype_, in_dygraph_mode +from paddle.fluid.framework import Program, program_guard from paddle.fluid.framework import _current_expected_place as _get_device from paddle.fluid.core import VarDesc from paddle.fluid.dygraph import no_grad @@ -1731,6 +1732,18 @@ class Layer(object): self._dtype = dtype return self + def _startup_program(self): + """ + Return starup program containing initialization operations of all parameters. + + NOTE(dev): This is a very low level API and only for inner developer. + """ + startup_program = Program() + for param in self.parameters(): + param._create_init_op(startup_program.global_block()) + + return startup_program + # [aliases] Compatible with old method names set_dict = set_state_dict load_dict = set_state_dict diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index fad402cc980e5d73ab7f88f02108d030ed6ecf66..3a98f1c5655f5f839874f6377ebabff332e1e50d 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -6794,18 +6794,19 @@ class EagerParamBase(_core_eager_eagertensor): self.need_clip = kwargs.get('need_clip', True) self.is_distributed = kwargs.get('is_distributed', False) - # self.block = default_main_program().global_block() - self.init_func = None + # hook functions for lazy initialization + self._init_func = None + self._init_op_creator = None def set_init_func(self, obj): - self.init_func = obj + self._init_func = obj @dygraph_only def initialize(self): - assert self.init_func is not None, "Required self.init_func is not None, but received None." - self.init_func() + assert self._init_func is not None, "Required self._init_func is not None, but received None." + self._init_func() # clear function handle to release resource - self.init_func = None + self._init_func = None @property def trainable(self): @@ -6820,6 +6821,13 @@ class EagerParamBase(_core_eager_eagertensor): "The type of trainable MUST be bool, but the type is ", type(trainable)) + def _create_init_op(self, block): + """ + Call init_op_creator function to create initializer operation in block. + """ + assert self._init_op_creator is not None, "Required self._init_op_creator is not None, but received None." + self._init_op_creator(block) + def __str__(self): """ Convert a EagerParamBase object to a readable string. diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index f2b4af165d61e5eb86d0dceb1a868cbe3baebf81..2dd2a74000bfaa41a0559119a5fd610e8957ddcc 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -19,7 +19,7 @@ import functools from . import framework from . import core from .framework import _non_static_mode, in_dygraph_mode, _in_legacy_dygraph, default_main_program, _current_expected_place -from .lazy_init import lazy_guard +from .lazy_init import lazy_init_helper from .framework import program_guard import numpy as np from .core import VarDesc @@ -52,7 +52,7 @@ class Initializer(object): pass def __call__(self, param, block=None): - if not lazy_guard().state: + if not lazy_init_helper().state: return self.forward(param, block) return self._lazy_init(param, block) @@ -63,18 +63,21 @@ class Initializer(object): raise NotImplementedError() def _lazy_init(self, param, block=None): - # Apply lazy initialization + """ + Apply lazy initialization + """ assert in_dygraph_mode() - new_block = lazy_guard().startup_program.global_block() - new_var = param._to_static_var(True, block=new_block) - # Record initializer operator - with lazy_guard(): - self.forward(new_var, new_block) - lazy_guard().enable(clear_cache=False) + def init_op_creator(forward, param, block): + new_var = param._to_static_var(True, block=block) + # Record initializer operator + with lazy_init_helper(): + forward(new_var, block) + # Add hook function for initializing param in dygraph mode - func = functools.partial(self.forward, param, block) - param.set_init_func(func) + param.set_init_func(functools.partial(self.forward, param, block)) + param._init_op_creator = functools.partial(init_op_creator, + self.forward, param) return param diff --git a/python/paddle/fluid/lazy_init.py b/python/paddle/fluid/lazy_init.py index 1e93c74c35b0cfba8ea7befe9409b9f705d711ef..8d98b1287e3e066843704aa67c2519687b909d29 100644 --- a/python/paddle/fluid/lazy_init.py +++ b/python/paddle/fluid/lazy_init.py @@ -14,23 +14,21 @@ from . import framework -__all__ = ["LazyInit"] +__all__ = ["LazyGuard"] -class LazyGuard(object): +class LazyInitHelper(object): """ - Guard Context to trigger switching mode between dygraph and static mode, + A Helper Context to trigger switching mode between dygraph and static mode, and holds the startup program resource. """ def __init__(self): - self._init_program() - self._state = False self._tracer = None self._in_guard = False - def enable(self, clear_cache=True): + def enable(self): """ Switch into lazy mode. @@ -42,9 +40,6 @@ class LazyGuard(object): ), "LazyInit.enable() is only available in dygraph mode." self._state = True - if clear_cache: - self._init_program() - def disable(self): """ Exit from lazy mode. @@ -55,15 +50,12 @@ class LazyGuard(object): return self._state = False - def _init_program(self): - self.startup_program = framework.Program() - def __enter__(self): """ Switch into lazy mode and set _dygraph_tracer_ with None to convert dygraph mode into static mode. """ - self.enable(clear_cache=True) + self.enable() if self._in_guard: return self._tracer = framework._dygraph_tracer_ framework._dygraph_tracer_ = None @@ -85,26 +77,22 @@ class LazyGuard(object): return self._state -_lazy_guard = LazyGuard() +_lazy_init_helper = LazyInitHelper() -def lazy_guard(): - global _lazy_guard - return _lazy_guard +def lazy_init_helper(): + global _lazy_init_helper + return _lazy_init_helper -class LazyInit(object): +class LazyGuard(object): """ - LazyInit is a wrapper interface for nn.Layer, it forwards the construct + LazyGuard is a wrapper interface for nn.Layer, it forwards the construct process of user defined Layer. Meanwhile, it provides necessary API to trigger EagerParamBase Lazy Initialization and get startup Program. """ - def __init__(self, class_obj=None): - self.class_obj = class_obj - self.clear_cache = True - - def __call__(self, *args, **kwargs): + def __enter__(self): """ Construct instance from class_obj by Lazy Initializing parameters. @@ -112,43 +100,16 @@ class LazyInit(object): .. code-block:: python - from paddle import LazyInit + from paddle import LazyGuard from paddle.nn import Linear - fc = LazyInit(Linear)(10, 10) + with LazyGuard(): + fc = LazyInit(Linear)(10, 10) for param in fc.parameters(): param.initialize() """ - assert isinstance( - self.class_obj, type - ), "Required class_obj must be a class type, but received %s." % self.class_obj - global _lazy_guard - _lazy_guard.enable(self.clear_cache) - # construct Layer instance - with framework.program_guard(framework.Program()): - instance = self.class_obj(*args, **kwargs) - _lazy_guard.disable() - # set @property dynamically to visit startup_program - instance.startup_program = _lazy_guard.startup_program - - return instance - - @staticmethod - def startup_program(): - """ - A static method to get startup program for the latest Layer. + lazy_init_helper().enable() - Examples: - - .. code-block:: python - - from paddle import LazyInit - from paddle.nn import Linear - - fc = LazyInit(Linear)(10, 10) - - print(LazyInit.startup_program()) - - """ - return _lazy_guard.startup_program + def __exit__(self, *args, **kwargs): + lazy_init_helper().disable() diff --git a/python/paddle/fluid/tests/unittests/test_lazy_init.py b/python/paddle/fluid/tests/unittests/test_lazy_init.py index 772ac78a69866a182073559589c1b2aad008d829..07a290234da877479c5f4b70ca6d16cb10a59d98 100644 --- a/python/paddle/fluid/tests/unittests/test_lazy_init.py +++ b/python/paddle/fluid/tests/unittests/test_lazy_init.py @@ -15,8 +15,8 @@ import paddle import unittest import numpy as np -from paddle import LazyInit -from paddle.nn import Linear +from paddle import LazyGuard +from paddle.nn import Linear, Layer from paddle.nn.initializer import * from paddle.fluid import unique_name @@ -47,11 +47,13 @@ class TestInitializerBase(unittest.TestCase): unique_name.dygraph_parameter_name_checker._name_set = set() def test_wrapper(self): - fc = LazyInit(Linear)(10, - 10, - weight_attr=self.weight_attr, - bias_attr=self.bias_attr) - program = fc.startup_program + with LazyGuard(): + fc = Linear(10, + 10, + weight_attr=self.weight_attr, + bias_attr=self.bias_attr) + program = fc._startup_program() + print(program) self.check_program(program) def check_program(self, program): @@ -64,10 +66,11 @@ class TestInitializerBase(unittest.TestCase): class TestDygraphLazy(TestInitializerBase): def test_wrapper(self): - fc = LazyInit(Linear)(10, - 10, - weight_attr=self.weight_attr, - bias_attr=self.bias_attr) + with LazyGuard(): + fc = Linear(10, + 10, + weight_attr=self.weight_attr, + bias_attr=self.bias_attr) self.check_data(fc) @@ -89,6 +92,66 @@ class TestDygraphLazy(TestInitializerBase): np.ones([10], dtype=np.float32) * 0.3) +class NestModel(Layer): + + def __init__(self, base_model): + super(NestModel, self).__init__() + self.base_model = base_model + self.fc = Linear(10, 10) + + def forward(self, x): + x = self.base_model(x) + x = self.fc(x) + return x + + +class TestNestModelLazy(TestInitializerBase): + + def test_wrapper(self): + with LazyGuard(): + base_model = Linear(10, + 10, + weight_attr=self.weight_attr, + bias_attr=self.bias_attr) + nest_model = NestModel(base_model) + + self.check_data(nest_model) + self.check_program(nest_model) + + def check_data(self, model): + x = paddle.randn([2, 10]) + # weight and bias have no memory + with self.assertRaises(RuntimeError): + out = model(x) + + for param in model.parameters(): + param.initialize() + + out = model(x) + self.assertEqual(out.shape, [2, 10]) + + np.testing.assert_allclose(model.base_model.weight.numpy(), + np.ones([10, 10], dtype=np.float32) * 0.6) + np.testing.assert_allclose(model.base_model.bias.numpy(), + np.ones([10], dtype=np.float32) * 0.3) + + def check_program(self, model): + # verify nest_model startup_program + whole_program = model._startup_program() + self.assertEqual(whole_program.block(0).var("weight").shape, (10, 10)) + self.assertEqual(whole_program.block(0).var("bias").shape, (10, )) + ops = [op.type for op in whole_program.block(0).ops] + init_ops = self.init_ops + ['uniform_random', 'fill_constant'] + self.assertEqual(ops, init_ops) + + # verify base_model startup_program + sub_program = model.base_model._startup_program() + self.assertEqual(sub_program.block(0).var("weight").shape, (10, 10)) + self.assertEqual(sub_program.block(0).var("bias").shape, (10, )) + ops = [op.type for op in sub_program.block(0).ops] + self.assertEqual(ops, self.init_ops) + + class TestUniform(TestInitializerBase): def set_initializer(self):