未验证 提交 5406b014 编写于 作者: C Chen Weihang 提交者: GitHub

Refine jit.save implement to adapt InputSpec using cases (#26959)

* add some unittest cases ot verify jit.save, no_test

* add more unittests

* add test with example inputs

* polish implement details

* remove useless blank

* fix fetch random error
上级 07d089f6
...@@ -18,6 +18,7 @@ import os ...@@ -18,6 +18,7 @@ import os
import pickle import pickle
import warnings import warnings
import functools import functools
from collections import OrderedDict
import six import six
import paddle import paddle
...@@ -633,6 +634,73 @@ class SaveLoadConfig(object): ...@@ -633,6 +634,73 @@ class SaveLoadConfig(object):
self._keep_name_table = value self._keep_name_table = value
def _get_input_var_names(inputs, input_spec):
name_none_error = "The %s's name is None. " \
"When using jit.save, please set InputSepc's name in " \
"to_static(input_spec=[]) and jit.save(input_spec=[]) " \
"and make sure they are consistent."
name_no_exists_error = "The tensor `%s` does not exists. " \
"Please make sure the name of InputSpec or example Tensor " \
"in input_spec is the same as the name of InputSpec in " \
"`to_static` decorated on the Layer.forward method."
result_list = []
input_var_names = [var.name for var in inputs if isinstance(var, Variable)]
if input_spec is None:
# no prune
result_list = input_var_names
elif input_spec is not None and len(input_spec) == len(input_var_names):
# no prune
result_list = input_var_names
# if input spec name not in input_var_names, only raise warning
for spec in input_spec:
if spec.name is None:
warnings.warn(name_none_error % spec)
elif spec.name not in input_var_names:
warnings.warn(name_no_exists_error % spec.name)
else:
# do nothing
pass
else:
# prune
for spec in input_spec:
if spec.name is None:
# name is None, the input_spec only can be InputSpec
raise ValueError(name_none_error % spec)
elif spec.name not in input_var_names:
# the input_spec can be `InputSpec` or `VarBase`
raise ValueError(name_no_exists_error % spec.name)
else:
result_list.append(spec.name)
return result_list
def _get_output_vars(outputs, output_spec):
name_no_exists_error = "The tensor `%s` does not exists. " \
"Please make sure the name of example Tensor " \
"in configs.output_spec is the output tensor of " \
"Layer.forward method."
result_list = []
output_vars_dict = OrderedDict()
for var in outputs:
if isinstance(var, Variable):
output_vars_dict[var.name] = var
if output_spec is None:
result_list = output_vars_dict.values()
elif output_spec is not None and len(output_spec) == len(output_vars_dict):
result_list = output_vars_dict.values()
for var in output_spec:
if var.name not in output_vars_dict:
warnings.warn(name_no_exists_error % var.name)
else:
for var in output_spec:
if var.name not in output_vars_dict:
raise ValueError(name_no_exists_error % var.name)
else:
result_list.append(output_vars_dict[var.name])
return result_list
# NOTE(chenweihang): change jit.save/load argument `configs` to `config` # NOTE(chenweihang): change jit.save/load argument `configs` to `config`
def deprecate_save_load_configs(func): def deprecate_save_load_configs(func):
@functools.wraps(func) @functools.wraps(func)
...@@ -753,26 +821,6 @@ def save(layer, model_path, input_spec=None, config=None): ...@@ -753,26 +821,6 @@ def save(layer, model_path, input_spec=None, config=None):
paddle.jit.save(layer, model_path) paddle.jit.save(layer, model_path)
""" """
def get_inout_spec(all_vars, target_vars, return_name=False):
result_list = []
valid_var_dict = {}
valid_vars = [var for var in all_vars if isinstance(var, Variable)]
for var in valid_vars:
valid_var_dict[var.name] = var
if target_vars:
for i, var in enumerate(target_vars):
# check target var whether exists
if var.name not in valid_var_dict:
raise RuntimeError(
"The variable to feed/fetch are not exist.")
result_list.append(valid_var_dict[var.name])
else:
result_list = valid_vars
if return_name:
result_list = [var.name for var in result_list]
return result_list
# 1. input check # 1. input check
prog_translator = ProgramTranslator() prog_translator = ProgramTranslator()
if not prog_translator.enable: if not prog_translator.enable:
...@@ -788,25 +836,58 @@ def save(layer, model_path, input_spec=None, config=None): ...@@ -788,25 +836,58 @@ def save(layer, model_path, input_spec=None, config=None):
if configs is None: if configs is None:
configs = SaveLoadConfig() configs = SaveLoadConfig()
# avoid change user given input_spec
inner_input_spec = None
if input_spec is not None: if input_spec is not None:
if not isinstance(input_spec, list): if not isinstance(input_spec, list):
raise TypeError( raise TypeError(
"The input input_spec should be 'list', but received input_spec's type is %s." "The input input_spec should be 'list', but received input_spec's type is %s."
% type(input_spec)) % type(input_spec))
inner_input_spec = []
for var in input_spec: for var in input_spec:
if not isinstance(var, (core.VarBase, Variable, if isinstance(var, paddle.static.InputSpec):
paddle.static.InputSpec)): inner_input_spec.append(var)
elif isinstance(var, (core.VarBase, Variable)):
inner_input_spec.append(
paddle.static.InputSpec.from_tensor(var))
else:
raise TypeError( raise TypeError(
"The element in input_spec list should be 'Variable' or `paddle.static.InputSpec`, but received element's type is %s." "The element in input_spec list should be 'Variable' or `paddle.static.InputSpec`, but received element's type is %s."
% type(var)) % type(var))
# 2. get program of declarative Layer.forward # 2. get program from Layer
if not isinstance(layer.forward, StaticLayer): # TODO(chenweihang): add support for other method, not only forward
raise RuntimeError( if isinstance(layer.forward, StaticLayer):
"layer.forward need to be decorated by `@declarative`.")
concrete_program = layer.forward.concrete_program concrete_program = layer.forward.concrete_program
else:
# NOTE: we maintain the mapping of variable name to # transform in jit.save, if input_spec is incomplete, declarative will throw error
static_forward = declarative(layer.forward, input_spec=inner_input_spec)
concrete_program = static_forward.concrete_program
# the input_spec has been used in declarative, which is equal to
# @declarative with input_spec and jit.save without input_spec,
# avoid needless warning
inner_input_spec = None
# 3. build input & output of save_infernece_model
# NOTE(chenweihang): [ Get input variables name ]
# There are two cases, whether to prune the inputs or not
# - not prune inputs (recommend):
# - the len(input_spec) == len((concrete_program.inputs) - 1
# - here can use concrete_program.inputs directly
# - prune inputs:
# - the input_spec length < len((concrete_program.inputs) - 1
# - the input_spec's name should be in concrete_program.inputs
input_var_names = _get_input_var_names(concrete_program.inputs,
inner_input_spec)
# NOTE(chenweihang): [ Get output variables ]
# the rule is like [ Get input variables name ]. For output var,
# we only support VarBase spec, and actually, we only need the
# var name of output, and we don't recommended to use output_spec
output_vars = _get_output_vars(concrete_program.outputs,
configs.output_spec)
# NOTE(chenweihang): we maintain the mapping of variable name to
# structured name, the buffer variable (non-persistable) # structured name, the buffer variable (non-persistable)
# saved to inference program may not need by dygraph Layer, # saved to inference program may not need by dygraph Layer,
# we only record the state_dict variable's structured name # we only record the state_dict variable's structured name
...@@ -814,7 +895,7 @@ def save(layer, model_path, input_spec=None, config=None): ...@@ -814,7 +895,7 @@ def save(layer, model_path, input_spec=None, config=None):
for structured_name, var in six.iteritems(layer.state_dict()): for structured_name, var in six.iteritems(layer.state_dict()):
state_names_dict[var.name] = structured_name state_names_dict[var.name] = structured_name
# 3. share parameters from Layer to scope & record var info # 4. share parameters from Layer to scope & record var info
scope = core.Scope() scope = core.Scope()
extra_var_info = dict() extra_var_info = dict()
for param_or_buffer in concrete_program.parameters: for param_or_buffer in concrete_program.parameters:
...@@ -832,10 +913,6 @@ def save(layer, model_path, input_spec=None, config=None): ...@@ -832,10 +913,6 @@ def save(layer, model_path, input_spec=None, config=None):
extra_info_dict['trainable'] = param_or_buffer.trainable extra_info_dict['trainable'] = param_or_buffer.trainable
extra_var_info[param_or_buffer.name] = extra_info_dict extra_var_info[param_or_buffer.name] = extra_info_dict
# 4. build input & output spec
input_var_names = get_inout_spec(concrete_program.inputs, input_spec, True)
output_vars = get_inout_spec(concrete_program.outputs, configs.output_spec)
# 5. save inference model # 5. save inference model
from paddle.fluid.io import save_inference_model from paddle.fluid.io import save_inference_model
...@@ -856,7 +933,7 @@ def save(layer, model_path, input_spec=None, config=None): ...@@ -856,7 +933,7 @@ def save(layer, model_path, input_spec=None, config=None):
export_for_deployment=configs._export_for_deployment, export_for_deployment=configs._export_for_deployment,
program_only=configs._program_only) program_only=configs._program_only)
# NOTE: [ Save extra variable info ] # NOTE(chenweihang): [ Save extra variable info ]
# save_inference_model will lose some important variable information, including: # save_inference_model will lose some important variable information, including:
# - Variable name and correspondence (when saved variables as one file) # - Variable name and correspondence (when saved variables as one file)
# - Variable.stop_gradient information # - Variable.stop_gradient information
......
...@@ -56,6 +56,16 @@ class LinearNet(fluid.dygraph.Layer): ...@@ -56,6 +56,16 @@ class LinearNet(fluid.dygraph.Layer):
return self._linear(x) return self._linear(x)
class LinearNetWithInputSpec(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(LinearNetWithInputSpec, self).__init__()
self._linear = Linear(in_size, out_size)
@declarative(input_spec=[InputSpec(shape=[None, 784], dtype='float32')])
def forward(self, x):
return self._linear(x)
class LinearNetNotDeclarative(fluid.dygraph.Layer): class LinearNetNotDeclarative(fluid.dygraph.Layer):
def __init__(self, in_size, out_size): def __init__(self, in_size, out_size):
super(LinearNetNotDeclarative, self).__init__() super(LinearNetNotDeclarative, self).__init__()
...@@ -65,6 +75,23 @@ class LinearNetNotDeclarative(fluid.dygraph.Layer): ...@@ -65,6 +75,23 @@ class LinearNetNotDeclarative(fluid.dygraph.Layer):
return self._linear(x) return self._linear(x)
class LinerNetWithLabel(paddle.nn.Layer):
def __init__(self, in_size, out_size):
super(LinerNetWithLabel, self).__init__()
self._linear = Linear(in_size, out_size)
@declarative(input_spec=[
InputSpec(
shape=[None, 784], dtype='float32', name="image"), InputSpec(
shape=[None, 1], dtype='int64', name="label")
])
def forward(self, x, label):
out = self._linear(x)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = fluid.layers.mean(loss)
return out, avg_loss
class LinearNetReturnLoss(fluid.dygraph.Layer): class LinearNetReturnLoss(fluid.dygraph.Layer):
def __init__(self, in_size, out_size): def __init__(self, in_size, out_size):
super(LinearNetReturnLoss, self).__init__() super(LinearNetReturnLoss, self).__init__()
...@@ -78,6 +105,54 @@ class LinearNetReturnLoss(fluid.dygraph.Layer): ...@@ -78,6 +105,54 @@ class LinearNetReturnLoss(fluid.dygraph.Layer):
return z, loss return z, loss
class LinearNetMultiInput(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(LinearNetMultiInput, self).__init__()
self._linear1 = Linear(in_size, out_size)
self._linear2 = Linear(in_size, out_size)
@declarative(input_spec=[
InputSpec(
[None, 8], dtype='float32'), InputSpec(
[None, 8], dtype='float32')
])
def forward(self, x, y):
x_out = self._linear1(x)
y_out = self._linear2(y)
loss = fluid.layers.mean(x_out + y_out)
return x_out, y_out, loss
class MultiLoadingLinearNet(fluid.dygraph.Layer):
def __init__(self, size, model_path):
super(MultiLoadingLinearNet, self).__init__()
self._linear = Linear(size, size)
self._load_linear1 = fluid.dygraph.jit.load(model_path)
self._load_linear2 = fluid.dygraph.jit.load(model_path)
@declarative
def forward(self, x):
tmp1 = self._linear(x)
tmp2 = self._load_linear1(tmp1)
tmp3 = self._load_linear2(tmp2)
y = self._linear(tmp3)
return y
class LinearNetReturnHidden(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(LinearNetReturnHidden, self).__init__()
self._linear_1 = Linear(in_size, out_size)
self._linear_2 = Linear(in_size, out_size)
@declarative
def forward(self, x):
y = self._linear_1(x)
z = self._linear_2(y)
loss = fluid.layers.mean(z)
return y, loss
def train(layer, input_size=784, label_size=1): def train(layer, input_size=784, label_size=1):
# create optimizer # create optimizer
sgd = fluid.optimizer.SGDOptimizer( sgd = fluid.optimizer.SGDOptimizer(
...@@ -102,6 +177,27 @@ def train(layer, input_size=784, label_size=1): ...@@ -102,6 +177,27 @@ def train(layer, input_size=784, label_size=1):
return [img], layer, avg_loss return [img], layer, avg_loss
def train_with_label(layer, input_size=784, label_size=1):
# create optimizer
sgd = fluid.optimizer.SGDOptimizer(
learning_rate=0.01, parameter_list=layer.parameters())
# create data loader
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(
random_batch_reader(input_size, label_size))
# train
for data in train_loader():
img, label = data
label.stop_gradient = True
out, avg_loss = layer(img, label)
avg_loss.backward()
sgd.minimize(avg_loss)
layer.clear_gradients()
return out
class TestJitSaveLoad(unittest.TestCase): class TestJitSaveLoad(unittest.TestCase):
def setUp(self): def setUp(self):
self.model_path = "model.test_jit_save_load" self.model_path = "model.test_jit_save_load"
...@@ -168,15 +264,6 @@ class TestJitSaveLoad(unittest.TestCase): ...@@ -168,15 +264,6 @@ class TestJitSaveLoad(unittest.TestCase):
self.assertTrue( self.assertTrue(
np.array_equal(train_layer(x).numpy(), new_layer(x).numpy())) np.array_equal(train_layer(x).numpy(), new_layer(x).numpy()))
def test_save_get_program_failed(self):
layer = LinearNetNotDeclarative(784, 1)
example_inputs, layer, _ = train(layer)
with self.assertRaises(RuntimeError):
fluid.dygraph.jit.save(
layer=layer,
model_path=self.model_path,
input_spec=example_inputs)
def test_load_dygraph_no_path(self): def test_load_dygraph_no_path(self):
model_path = "model.test_jit_save_load.no_path" model_path = "model.test_jit_save_load.no_path"
new_layer = LinearNet(784, 1) new_layer = LinearNet(784, 1)
...@@ -184,24 +271,6 @@ class TestJitSaveLoad(unittest.TestCase): ...@@ -184,24 +271,6 @@ class TestJitSaveLoad(unittest.TestCase):
model_dict, _ = fluid.dygraph.load_dygraph(model_path) model_dict, _ = fluid.dygraph.load_dygraph(model_path)
class LinearNetMultiInput(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(LinearNetMultiInput, self).__init__()
self._linear1 = Linear(in_size, out_size)
# self._linear2 = Linear(in_size, out_size)
@declarative(input_spec=[
InputSpec(
[None, 8], dtype='float32'), InputSpec(
[None, 8], dtype='float32')
])
def forward(self, x, y):
x_out = self._linear1(x)
y_out = self._linear1(y)
loss = fluid.layers.mean(x_out + y_out)
return x_out, y_out, loss
class TestSaveLoadWithInputSpec(unittest.TestCase): class TestSaveLoadWithInputSpec(unittest.TestCase):
def setUp(self): def setUp(self):
# enable dygraph mode # enable dygraph mode
...@@ -345,22 +414,6 @@ class TestJitSaveLoadConfig(unittest.TestCase): ...@@ -345,22 +414,6 @@ class TestJitSaveLoadConfig(unittest.TestCase):
np.array_equal(train_layer(x)[0].numpy(), infer_layer(x).numpy())) np.array_equal(train_layer(x)[0].numpy(), infer_layer(x).numpy()))
class MultiLoadingLinearNet(fluid.dygraph.Layer):
def __init__(self, size, model_path):
super(MultiLoadingLinearNet, self).__init__()
self._linear = Linear(size, size)
self._load_linear1 = fluid.dygraph.jit.load(model_path)
self._load_linear2 = fluid.dygraph.jit.load(model_path)
@declarative
def forward(self, x):
tmp1 = self._linear(x)
tmp2 = self._load_linear1(tmp1)
tmp3 = self._load_linear2(tmp2)
y = self._linear(tmp3)
return y
class TestJitMultipleLoading(unittest.TestCase): class TestJitMultipleLoading(unittest.TestCase):
def setUp(self): def setUp(self):
self.linear_size = 4 self.linear_size = 4
...@@ -389,20 +442,6 @@ class TestJitMultipleLoading(unittest.TestCase): ...@@ -389,20 +442,6 @@ class TestJitMultipleLoading(unittest.TestCase):
name_set.add(var.name) name_set.add(var.name)
class LinearNetReturnHidden(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(LinearNetReturnHidden, self).__init__()
self._linear_1 = Linear(in_size, out_size)
self._linear_2 = Linear(in_size, out_size)
@declarative
def forward(self, x):
y = self._linear_1(x)
z = self._linear_2(y)
loss = fluid.layers.mean(z)
return y, loss
class TestJitPruneModelAndLoad(unittest.TestCase): class TestJitPruneModelAndLoad(unittest.TestCase):
def setUp(self): def setUp(self):
self.linear_size = 4 self.linear_size = 4
...@@ -461,5 +500,197 @@ class TestJitPruneModelAndLoad(unittest.TestCase): ...@@ -461,5 +500,197 @@ class TestJitPruneModelAndLoad(unittest.TestCase):
fluid.dygraph.jit.load(self.model_path) fluid.dygraph.jit.load(self.model_path)
class TestJitSaveMultiCases(unittest.TestCase):
def setUp(self):
# enable dygraph mode
fluid.enable_dygraph()
# config seed
paddle.manual_seed(SEED)
paddle.framework.random._manual_program_seed(SEED)
def verify_inference_correctness(self, layer, model_path, with_label=False):
layer.eval()
loaded_layer = paddle.jit.load(model_path)
loaded_layer.eval()
# inference & compare
x = paddle.to_variable(np.random.random((1, 784)).astype('float32'))
if with_label:
y = paddle.to_variable(np.random.random((1, 1)).astype('int64'))
pred, _ = layer(x, y)
pred = pred.numpy()
else:
pred = layer(x).numpy()
loaded_pred = loaded_layer(x).numpy()
self.assertTrue(
np.array_equal(pred, loaded_pred),
msg="Result diff when load and inference:\nlayer result:\n{}\n" \
"loaded layer result:\n{}".format(pred, loaded_pred))
def test_no_prune_to_static_after_train(self):
layer = LinearNet(784, 1)
train(layer)
model_path = "test_no_prune_to_static_after_train"
paddle.jit.save(layer, model_path)
self.verify_inference_correctness(layer, model_path)
def test_no_prune_to_static_no_train(self):
layer = LinearNetWithInputSpec(784, 1)
model_path = "test_no_prune_to_static_no_train"
paddle.jit.save(layer, model_path)
self.verify_inference_correctness(layer, model_path)
def test_no_prune_no_to_static_after_train(self):
layer = LinearNetNotDeclarative(784, 1)
train(layer)
model_path = "test_no_prune_no_to_static_after_train"
paddle.jit.save(
layer,
model_path,
input_spec=[InputSpec(
shape=[None, 784], dtype='float32')])
self.verify_inference_correctness(layer, model_path)
def test_no_prune_no_to_static_after_train_with_examples(self):
layer = LinearNetNotDeclarative(784, 1)
example_inputs, _, _ = train(layer)
model_path = "test_no_prune_no_to_static_after_train_with_examples"
fluid.dygraph.jit.save(
layer=layer, model_path=model_path, input_spec=example_inputs)
self.verify_inference_correctness(layer, model_path)
def test_no_prune_no_to_static_no_train(self):
layer = LinearNetNotDeclarative(784, 1)
model_path = "test_no_prune_no_to_static_no_train"
paddle.jit.save(
layer,
model_path,
input_spec=[InputSpec(
shape=[None, 784], dtype='float32')])
self.verify_inference_correctness(layer, model_path)
def test_prune_to_static_after_train(self):
layer = LinerNetWithLabel(784, 1)
out = train_with_label(layer)
model_path = "test_prune_to_static_after_train"
configs = paddle.SaveLoadConfig()
configs.output_spec = [out]
paddle.jit.save(
layer,
model_path,
input_spec=[
InputSpec(
shape=[None, 784], dtype='float32', name="image")
],
configs=configs)
self.verify_inference_correctness(layer, model_path, True)
def test_prune_to_static_no_train(self):
layer = LinerNetWithLabel(784, 1)
model_path = "test_prune_to_static_no_train"
configs = paddle.SaveLoadConfig()
# TODO: no train, cannot get output_spec var here
# now only can use index
configs.output_spec = layer.forward.outputs[:1]
paddle.jit.save(
layer,
model_path,
input_spec=[
InputSpec(
shape=[None, 784], dtype='float32', name="image")
],
configs=configs)
self.verify_inference_correctness(layer, model_path, True)
def test_no_prune_input_spec_name_warning(self):
layer = LinearNetWithInputSpec(784, 1)
train(layer)
model_path = "test_no_prune_input_spec_name_warning"
paddle.jit.save(
layer,
model_path,
input_spec=[InputSpec(
shape=[None, 784], dtype='float32')])
paddle.jit.save(
layer,
model_path,
input_spec=[
InputSpec(
shape=[None, 784], dtype='float32', name='feed_input')
])
self.verify_inference_correctness(layer, model_path)
def test_not_prune_output_spec_name_warning(self):
layer = LinearNet(784, 1)
train(layer)
model_path = "test_not_prune_output_spec_name_warning"
configs = paddle.SaveLoadConfig()
out = paddle.to_variable(np.random.random((1, 1)).astype('float'))
configs.output_spec = [out]
paddle.jit.save(layer, model_path, configs=configs)
self.verify_inference_correctness(layer, model_path)
def test_prune_input_spec_name_error(self):
layer = LinerNetWithLabel(784, 1)
model_path = "test_prune_input_spec_name_error"
with self.assertRaises(ValueError):
paddle.jit.save(
layer,
model_path,
input_spec=[InputSpec(
shape=[None, 784], dtype='float32')])
with self.assertRaises(ValueError):
paddle.jit.save(
layer,
model_path,
input_spec=[
InputSpec(
shape=[None, 784], dtype='float32', name='feed_input')
])
def test_prune_output_spec_name_error(self):
layer = LinerNetWithLabel(784, 1)
train_with_label(layer)
model_path = "test_prune_to_static_after_train"
configs = paddle.SaveLoadConfig()
out = paddle.to_variable(np.random.random((1, 1)).astype('float'))
configs.output_spec = [out]
with self.assertRaises(ValueError):
paddle.jit.save(
layer,
model_path,
input_spec=[
InputSpec(
shape=[None, 784], dtype='float32', name="image")
],
configs=configs)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册