未验证 提交 0e6bf744 编写于 作者: A Aurelius84 提交者: GitHub

[Dy2St]Support to generate whole program in ProgramHelper for Engine (#44827)

* [Dy2St]Support generate whole program in ProgramHelper for Engine

* support to(mode)

* fix word typo

* fix unittest
上级 4ead5494
...@@ -13,11 +13,14 @@ ...@@ -13,11 +13,14 @@
# limitations under the License. # limitations under the License.
import logging import logging
from collections import defaultdict
from paddle.nn import Layer from paddle.nn import Layer
from paddle.jit import to_static, not_to_static from paddle.jit import to_static, not_to_static
from paddle.distributed.utils import get_logger from paddle.distributed.utils import get_logger
from paddle.fluid.framework import Operator, Parameter, _non_static_mode from paddle.fluid.framework import Operator, Parameter, _non_static_mode
from paddle.fluid.framework import program_guard
from paddle.fluid.dygraph.dygraph_to_static.program_translator import StaticFunction
from .utils import to_list from .utils import to_list
...@@ -39,29 +42,30 @@ class ProxyLayer(Layer): ...@@ -39,29 +42,30 @@ class ProxyLayer(Layer):
self.mode = None self.mode = None
# generated program vars # generated program vars
self.input_vars = [] self._input_vars = defaultdict(list)
self.label_vars = [] self._label_vars = defaultdict(list)
self.output_vars = [] self._output_vars = defaultdict(list)
self.loss_vars = [] self._loss_vars = defaultdict(list)
self.metric_vars = [] self._metric_vars = defaultdict(list)
def _train(self, inputs, labels): def _train(self, inputs, labels):
""" """
Train process of inner_layer with forward/loss/metric logic. Train process of inner_layer with forward/loss/metric logic.
""" """
# step 1. save feed variables of Program # step 1. save feed variables of Program
self.input_vars = inputs mode = 'train'
self.label_vars = labels self._input_vars[mode] = inputs
self._label_vars[mode] = labels
# step 2. call inner_layer.forward # step 2. call inner_layer.forward
self.output_vars = self.inner_layer(*inputs) self._output_vars[mode] = self.inner_layer(*inputs)
# step 3. calculate loss if needed # step 3. calculate loss if needed
new_inputs = self._prepare(self.output_vars, labels) new_inputs = self._prepare(self.output_vars, labels)
self.loss_vars = self.call_loss(new_inputs) self._loss_vars[mode] = self.call_loss(new_inputs)
# step 4. calculate metrics if needed # step 4. calculate metrics if needed
self.metric_vars = self.call_metrics(new_inputs) self._metric_vars[mode] = self.call_metrics(new_inputs)
def _eval(self, inputs, labels): def _eval(self, inputs, labels):
""" """
...@@ -71,28 +75,30 @@ class ProxyLayer(Layer): ...@@ -71,28 +75,30 @@ class ProxyLayer(Layer):
# sure if they can. # sure if they can.
# step 1. save feed variables of Program # step 1. save feed variables of Program
self.input_vars = inputs mode = 'eval'
self.label_vars = labels self._input_vars[mode] = inputs
self._label_vars[mode] = labels
# step 2. call inner_layer.forward # step 2. call inner_layer.forward
self.output_vars = self.inner_layer(*inputs) self._output_vars[mode] = self.inner_layer(*inputs)
# step 3. calculate loss if needed # step 3. calculate loss if needed
new_inputs = self._prepare(self.output_vars, labels) new_inputs = self._prepare(self.output_vars, labels)
self.loss_vars = self.call_loss(new_inputs) self._loss_vars[mode] = self.call_loss(new_inputs)
# step 4. calculate metrics if needed # step 4. calculate metrics if needed
self.metric_vars = self.call_metrics(new_inputs) self._metric_vars[mode] = self.call_metrics(new_inputs)
def _predict(self, inputs): def _predict(self, inputs):
""" """
Predict process of inner_layer with forward logic. Predict process of inner_layer with forward logic.
""" """
# step 1. save feed variables of Program # step 1. save feed variables of Program
self.input_vars = inputs mode = 'predict'
self._input_vars[mode] = inputs
# step 2. call inner_layer.forward # step 2. call inner_layer.forward
self.output_vars = self.inner_layer(*inputs) self._output_vars[mode] = self.inner_layer(*inputs)
@not_to_static @not_to_static
def _prepare(self, outputs, labels): def _prepare(self, outputs, labels):
...@@ -136,15 +142,46 @@ class ProxyLayer(Layer): ...@@ -136,15 +142,46 @@ class ProxyLayer(Layer):
self.mode = mode self.mode = mode
self.training = mode == 'train' self.training = mode == 'train'
def clone(self):
return ProxyLayer(self.inner_layer, self.loss_func, self.metrics)
@property
def input_vars(self):
return self._input_vars[self.mode]
@property
def label_vars(self):
return self._label_vars[self.mode]
@property
def output_vars(self):
return self._output_vars[self.mode]
@property
def loss_vars(self):
return self._loss_vars[self.mode]
@property
def metric_vars(self):
return self._metric_vars[self.mode]
class BuildInfo: class BuildInfo:
def __init__(self, mode=None, state=False): def __init__(self):
self.mode = mode self.clear()
self.state = state
def has_cache(self, mode, update=False):
is_cache = self.states[mode]
if update:
self.cache(mode)
return is_cache
def has_cache(self, mode): def cache(self, mode):
return self.mode == mode and self.state is True self.states[mode] = True
def clear(self):
self.states = defaultdict(bool)
class ProgramHelper(object): class ProgramHelper(object):
...@@ -163,20 +200,27 @@ class ProgramHelper(object): ...@@ -163,20 +200,27 @@ class ProgramHelper(object):
self.build_info = BuildInfo() self.build_info = BuildInfo()
self._logger = get_logger(logging.INFO) self._logger = get_logger(logging.INFO)
def reset(self):
"""
Reset all state of current Object.
"""
self.build_info.clear()
self.proxy_layer = self.proxy_layer.clone()
def build_program(self, mode): def build_program(self, mode):
""" """
Convert dygraph model into static Program IR. Convert dygraph model into static Program IR.
""" """
assert mode in ['train', 'eval', 'predict'] assert mode in ['train', 'eval', 'predict']
self.proxy_layer.set_mode(mode)
# skip if we has already built program. # skip if we has already built program.
if self.build_info.has_cache(mode): if self.build_info.has_cache(mode, True):
self._logger.info( self._logger.info(
"Already build program with mode = %s, use cached program." % "Already build program with mode = %s, use cached program." %
mode) mode)
return return
self._logger.info("start to build program for mode = %s." % mode) self._logger.info("start to build program for mode = %s." % mode)
self.proxy_layer.mode = mode
input_spec = [self.inputs_spec, self.labels_spec input_spec = [self.inputs_spec, self.labels_spec
] if mode != 'predict' else [self.inputs_spec] ] if mode != 'predict' else [self.inputs_spec]
static_func = to_static(self.static_func(), input_spec=input_spec) static_func = to_static(self.static_func(), input_spec=input_spec)
...@@ -188,6 +232,8 @@ class ProgramHelper(object): ...@@ -188,6 +232,8 @@ class ProgramHelper(object):
# generating Program IR immediately. # generating Program IR immediately.
getattr(self.proxy_layer, func_name).concrete_program getattr(self.proxy_layer, func_name).concrete_program
self._build_startup_program()
def _build_startup_program(self): def _build_startup_program(self):
""" """
Create and Sync parameters into startup program. Create and Sync parameters into startup program.
...@@ -201,9 +247,46 @@ class ProgramHelper(object): ...@@ -201,9 +247,46 @@ class ProgramHelper(object):
stop_gradient=param.stop_gradient, stop_gradient=param.stop_gradient,
block=self.startup_program.global_block()) block=self.startup_program.global_block())
def apply_optimizer(self, optimizer):
"""
Append backward and generate optimizer operations.
"""
self._verify_optimizer(optimizer)
self._logger.info("start to apply optimizer: %s ",
type(optimizer).__name__)
# clear optimizer parameters
original_params = optimizer._parameter_list
optimizer._parameter_list = None
with program_guard(self.main_program, self.startup_program):
res = optimizer.minimize(self.loss_vars[0])
# restore optimizer parameters
optimizer._parameter_list = original_params
return res
def _verify_optimizer(self, optimizer):
assert optimizer is not None
assert hasattr(optimizer,
"minimize"), "Optimizer must have minimize() method."
assert self.proxy_layer.mode == 'train', "Required mode == 'train', but received '%s'" % self.proxy_layer.mode
assert len(
self.loss_vars
) == 1, "Required len(loss_vars) == 1, but received len(loss_vars) = %s" % len(
self.loss_vars)
def to(self, mode):
"""
Switch underly proxy layer mode into target mode.
"""
assert mode in ['train', 'eval', 'predict']
func = getattr(self.proxy_layer, '_' + mode)
assert isinstance(
func, StaticFunction), "Please call build_program(mode) firstly."
self.proxy_layer.set_mode(mode)
def static_func(self): def static_func(self):
""" """
Return target mode function. Return StaticFunction instance with underly target mode.
""" """
assert self.proxy_layer.mode in [ assert self.proxy_layer.mode in [
'train', 'eval', 'predict' 'train', 'eval', 'predict'
......
...@@ -27,6 +27,7 @@ from paddle.io import Dataset ...@@ -27,6 +27,7 @@ from paddle.io import Dataset
from paddle.static import InputSpec from paddle.static import InputSpec
from paddle.fluid.framework import _non_static_mode from paddle.fluid.framework import _non_static_mode
from paddle.distributed.auto_parallel.engine import Engine from paddle.distributed.auto_parallel.engine import Engine
from paddle.distributed.auto_parallel.hepler import ProgramHelper
batch_size = 4 batch_size = 4
batch_num = 30 batch_num = 30
...@@ -85,6 +86,45 @@ class MLPLayer(nn.Layer): ...@@ -85,6 +86,45 @@ class MLPLayer(nn.Layer):
return out return out
class TestWholeProgram(unittest.TestCase):
def test_apply_optimzier(self):
paddle.disable_static()
mlp = MLPLayer(hidden_size=hidden_size,
intermediate_size=4 * hidden_size,
dropout_ratio=0.1,
initializer_range=0.02)
metrics = paddle.metric.Accuracy()
loss = paddle.nn.CrossEntropyLoss()
optimizer = paddle.optimizer.SGD(learning_rate=0.00001,
parameters=mlp.parameters())
inputs = InputSpec([batch_size, hidden_size], 'float32', 'x')
labels = InputSpec([batch_size], 'int64', 'label')
program_helper = ProgramHelper(mlp, loss, [metrics], [inputs], [labels])
paddle.enable_static()
# step 1: build program
program_helper.build_program(mode='train')
program_helper.build_program(mode='eval')
# support easily to switch mode
program_helper.to('train')
forward_ops = program_helper.main_program.block(0).ops
self.assertEqual(len(forward_ops), 21)
# step 2: apply optimzer to generate whole program
optimize_ops, _ = program_helper.apply_optimizer(optimizer)
all_ops = program_helper.main_program.block(0).ops
sgd_ops = [
op for op in program_helper.main_program.block(0).ops
if op.type == 'sgd'
]
self.assertEqual(len(all_ops), 41)
self.assertEqual(len(optimize_ops), len(sgd_ops))
program_helper.reset()
class TestToStatic(unittest.TestCase): class TestToStatic(unittest.TestCase):
def test_to_static(self): def test_to_static(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册