diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py index ad2088116243e3a0a75fb43b9bb34b19456c84de..ec96bdd9786738af2da0bfb07858679ad8cbfcbf 100644 --- a/python/paddle/fluid/dygraph/jit.py +++ b/python/paddle/fluid/dygraph/jit.py @@ -18,6 +18,7 @@ import os import pickle import warnings import functools +from collections import OrderedDict import six import paddle @@ -633,6 +634,73 @@ class SaveLoadConfig(object): self._keep_name_table = value +def _get_input_var_names(inputs, input_spec): + name_none_error = "The %s's name is None. " \ + "When using jit.save, please set InputSepc's name in " \ + "to_static(input_spec=[]) and jit.save(input_spec=[]) " \ + "and make sure they are consistent." + name_no_exists_error = "The tensor `%s` does not exists. " \ + "Please make sure the name of InputSpec or example Tensor " \ + "in input_spec is the same as the name of InputSpec in " \ + "`to_static` decorated on the Layer.forward method." + result_list = [] + input_var_names = [var.name for var in inputs if isinstance(var, Variable)] + if input_spec is None: + # no prune + result_list = input_var_names + elif input_spec is not None and len(input_spec) == len(input_var_names): + # no prune + result_list = input_var_names + # if input spec name not in input_var_names, only raise warning + for spec in input_spec: + if spec.name is None: + warnings.warn(name_none_error % spec) + elif spec.name not in input_var_names: + warnings.warn(name_no_exists_error % spec.name) + else: + # do nothing + pass + else: + # prune + for spec in input_spec: + if spec.name is None: + # name is None, the input_spec only can be InputSpec + raise ValueError(name_none_error % spec) + elif spec.name not in input_var_names: + # the input_spec can be `InputSpec` or `VarBase` + raise ValueError(name_no_exists_error % spec.name) + else: + result_list.append(spec.name) + + return result_list + + +def _get_output_vars(outputs, output_spec): + name_no_exists_error = "The tensor `%s` does not exists. " \ + "Please make sure the name of example Tensor " \ + "in configs.output_spec is the output tensor of " \ + "Layer.forward method." + result_list = [] + output_vars_dict = OrderedDict() + for var in outputs: + if isinstance(var, Variable): + output_vars_dict[var.name] = var + if output_spec is None: + result_list = output_vars_dict.values() + elif output_spec is not None and len(output_spec) == len(output_vars_dict): + result_list = output_vars_dict.values() + for var in output_spec: + if var.name not in output_vars_dict: + warnings.warn(name_no_exists_error % var.name) + else: + for var in output_spec: + if var.name not in output_vars_dict: + raise ValueError(name_no_exists_error % var.name) + else: + result_list.append(output_vars_dict[var.name]) + return result_list + + # NOTE(chenweihang): change jit.save/load argument `configs` to `config` def deprecate_save_load_configs(func): @functools.wraps(func) @@ -753,26 +821,6 @@ def save(layer, model_path, input_spec=None, config=None): paddle.jit.save(layer, model_path) """ - def get_inout_spec(all_vars, target_vars, return_name=False): - result_list = [] - valid_var_dict = {} - valid_vars = [var for var in all_vars if isinstance(var, Variable)] - for var in valid_vars: - valid_var_dict[var.name] = var - if target_vars: - for i, var in enumerate(target_vars): - # check target var whether exists - if var.name not in valid_var_dict: - raise RuntimeError( - "The variable to feed/fetch are not exist.") - result_list.append(valid_var_dict[var.name]) - else: - result_list = valid_vars - if return_name: - result_list = [var.name for var in result_list] - - return result_list - # 1. input check prog_translator = ProgramTranslator() if not prog_translator.enable: @@ -788,25 +836,58 @@ def save(layer, model_path, input_spec=None, config=None): if configs is None: configs = SaveLoadConfig() + # avoid change user given input_spec + inner_input_spec = None if input_spec is not None: if not isinstance(input_spec, list): raise TypeError( "The input input_spec should be 'list', but received input_spec's type is %s." % type(input_spec)) + inner_input_spec = [] for var in input_spec: - if not isinstance(var, (core.VarBase, Variable, - paddle.static.InputSpec)): + if isinstance(var, paddle.static.InputSpec): + inner_input_spec.append(var) + elif isinstance(var, (core.VarBase, Variable)): + inner_input_spec.append( + paddle.static.InputSpec.from_tensor(var)) + else: raise TypeError( "The element in input_spec list should be 'Variable' or `paddle.static.InputSpec`, but received element's type is %s." % type(var)) - # 2. get program of declarative Layer.forward - if not isinstance(layer.forward, StaticLayer): - raise RuntimeError( - "layer.forward need to be decorated by `@declarative`.") - concrete_program = layer.forward.concrete_program - - # NOTE: we maintain the mapping of variable name to + # 2. get program from Layer + # TODO(chenweihang): add support for other method, not only forward + if isinstance(layer.forward, StaticLayer): + concrete_program = layer.forward.concrete_program + else: + # transform in jit.save, if input_spec is incomplete, declarative will throw error + static_forward = declarative(layer.forward, input_spec=inner_input_spec) + concrete_program = static_forward.concrete_program + # the input_spec has been used in declarative, which is equal to + # @declarative with input_spec and jit.save without input_spec, + # avoid needless warning + inner_input_spec = None + + # 3. build input & output of save_infernece_model + # NOTE(chenweihang): [ Get input variables name ] + # There are two cases, whether to prune the inputs or not + # - not prune inputs (recommend): + # - the len(input_spec) == len((concrete_program.inputs) - 1 + # - here can use concrete_program.inputs directly + # - prune inputs: + # - the input_spec length < len((concrete_program.inputs) - 1 + # - the input_spec's name should be in concrete_program.inputs + input_var_names = _get_input_var_names(concrete_program.inputs, + inner_input_spec) + + # NOTE(chenweihang): [ Get output variables ] + # the rule is like [ Get input variables name ]. For output var, + # we only support VarBase spec, and actually, we only need the + # var name of output, and we don't recommended to use output_spec + output_vars = _get_output_vars(concrete_program.outputs, + configs.output_spec) + + # NOTE(chenweihang): we maintain the mapping of variable name to # structured name, the buffer variable (non-persistable) # saved to inference program may not need by dygraph Layer, # we only record the state_dict variable's structured name @@ -814,7 +895,7 @@ def save(layer, model_path, input_spec=None, config=None): for structured_name, var in six.iteritems(layer.state_dict()): state_names_dict[var.name] = structured_name - # 3. share parameters from Layer to scope & record var info + # 4. share parameters from Layer to scope & record var info scope = core.Scope() extra_var_info = dict() for param_or_buffer in concrete_program.parameters: @@ -832,10 +913,6 @@ def save(layer, model_path, input_spec=None, config=None): extra_info_dict['trainable'] = param_or_buffer.trainable extra_var_info[param_or_buffer.name] = extra_info_dict - # 4. build input & output spec - input_var_names = get_inout_spec(concrete_program.inputs, input_spec, True) - output_vars = get_inout_spec(concrete_program.outputs, configs.output_spec) - # 5. save inference model from paddle.fluid.io import save_inference_model @@ -856,7 +933,7 @@ def save(layer, model_path, input_spec=None, config=None): export_for_deployment=configs._export_for_deployment, program_only=configs._program_only) - # NOTE: [ Save extra variable info ] + # NOTE(chenweihang): [ Save extra variable info ] # save_inference_model will lose some important variable information, including: # - Variable name and correspondence (when saved variables as one file) # - Variable.stop_gradient information diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py index f7fcc1ff561b90dc1b78a67ffbe7c047ed06d0e9..7bf806bab557e7e84eb76e3a9876745e6107a5ab 100644 --- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py @@ -56,6 +56,16 @@ class LinearNet(fluid.dygraph.Layer): return self._linear(x) +class LinearNetWithInputSpec(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(LinearNetWithInputSpec, self).__init__() + self._linear = Linear(in_size, out_size) + + @declarative(input_spec=[InputSpec(shape=[None, 784], dtype='float32')]) + def forward(self, x): + return self._linear(x) + + class LinearNetNotDeclarative(fluid.dygraph.Layer): def __init__(self, in_size, out_size): super(LinearNetNotDeclarative, self).__init__() @@ -65,6 +75,23 @@ class LinearNetNotDeclarative(fluid.dygraph.Layer): return self._linear(x) +class LinerNetWithLabel(paddle.nn.Layer): + def __init__(self, in_size, out_size): + super(LinerNetWithLabel, self).__init__() + self._linear = Linear(in_size, out_size) + + @declarative(input_spec=[ + InputSpec( + shape=[None, 784], dtype='float32', name="image"), InputSpec( + shape=[None, 1], dtype='int64', name="label") + ]) + def forward(self, x, label): + out = self._linear(x) + loss = fluid.layers.cross_entropy(out, label) + avg_loss = fluid.layers.mean(loss) + return out, avg_loss + + class LinearNetReturnLoss(fluid.dygraph.Layer): def __init__(self, in_size, out_size): super(LinearNetReturnLoss, self).__init__() @@ -78,6 +105,54 @@ class LinearNetReturnLoss(fluid.dygraph.Layer): return z, loss +class LinearNetMultiInput(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(LinearNetMultiInput, self).__init__() + self._linear1 = Linear(in_size, out_size) + self._linear2 = Linear(in_size, out_size) + + @declarative(input_spec=[ + InputSpec( + [None, 8], dtype='float32'), InputSpec( + [None, 8], dtype='float32') + ]) + def forward(self, x, y): + x_out = self._linear1(x) + y_out = self._linear2(y) + loss = fluid.layers.mean(x_out + y_out) + return x_out, y_out, loss + + +class MultiLoadingLinearNet(fluid.dygraph.Layer): + def __init__(self, size, model_path): + super(MultiLoadingLinearNet, self).__init__() + self._linear = Linear(size, size) + self._load_linear1 = fluid.dygraph.jit.load(model_path) + self._load_linear2 = fluid.dygraph.jit.load(model_path) + + @declarative + def forward(self, x): + tmp1 = self._linear(x) + tmp2 = self._load_linear1(tmp1) + tmp3 = self._load_linear2(tmp2) + y = self._linear(tmp3) + return y + + +class LinearNetReturnHidden(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(LinearNetReturnHidden, self).__init__() + self._linear_1 = Linear(in_size, out_size) + self._linear_2 = Linear(in_size, out_size) + + @declarative + def forward(self, x): + y = self._linear_1(x) + z = self._linear_2(y) + loss = fluid.layers.mean(z) + return y, loss + + def train(layer, input_size=784, label_size=1): # create optimizer sgd = fluid.optimizer.SGDOptimizer( @@ -102,6 +177,27 @@ def train(layer, input_size=784, label_size=1): return [img], layer, avg_loss +def train_with_label(layer, input_size=784, label_size=1): + # create optimizer + sgd = fluid.optimizer.SGDOptimizer( + learning_rate=0.01, parameter_list=layer.parameters()) + # create data loader + train_loader = fluid.io.DataLoader.from_generator(capacity=5) + train_loader.set_batch_generator( + random_batch_reader(input_size, label_size)) + # train + for data in train_loader(): + img, label = data + label.stop_gradient = True + + out, avg_loss = layer(img, label) + + avg_loss.backward() + sgd.minimize(avg_loss) + layer.clear_gradients() + return out + + class TestJitSaveLoad(unittest.TestCase): def setUp(self): self.model_path = "model.test_jit_save_load" @@ -168,15 +264,6 @@ class TestJitSaveLoad(unittest.TestCase): self.assertTrue( np.array_equal(train_layer(x).numpy(), new_layer(x).numpy())) - def test_save_get_program_failed(self): - layer = LinearNetNotDeclarative(784, 1) - example_inputs, layer, _ = train(layer) - with self.assertRaises(RuntimeError): - fluid.dygraph.jit.save( - layer=layer, - model_path=self.model_path, - input_spec=example_inputs) - def test_load_dygraph_no_path(self): model_path = "model.test_jit_save_load.no_path" new_layer = LinearNet(784, 1) @@ -184,24 +271,6 @@ class TestJitSaveLoad(unittest.TestCase): model_dict, _ = fluid.dygraph.load_dygraph(model_path) -class LinearNetMultiInput(fluid.dygraph.Layer): - def __init__(self, in_size, out_size): - super(LinearNetMultiInput, self).__init__() - self._linear1 = Linear(in_size, out_size) - # self._linear2 = Linear(in_size, out_size) - - @declarative(input_spec=[ - InputSpec( - [None, 8], dtype='float32'), InputSpec( - [None, 8], dtype='float32') - ]) - def forward(self, x, y): - x_out = self._linear1(x) - y_out = self._linear1(y) - loss = fluid.layers.mean(x_out + y_out) - return x_out, y_out, loss - - class TestSaveLoadWithInputSpec(unittest.TestCase): def setUp(self): # enable dygraph mode @@ -345,22 +414,6 @@ class TestJitSaveLoadConfig(unittest.TestCase): np.array_equal(train_layer(x)[0].numpy(), infer_layer(x).numpy())) -class MultiLoadingLinearNet(fluid.dygraph.Layer): - def __init__(self, size, model_path): - super(MultiLoadingLinearNet, self).__init__() - self._linear = Linear(size, size) - self._load_linear1 = fluid.dygraph.jit.load(model_path) - self._load_linear2 = fluid.dygraph.jit.load(model_path) - - @declarative - def forward(self, x): - tmp1 = self._linear(x) - tmp2 = self._load_linear1(tmp1) - tmp3 = self._load_linear2(tmp2) - y = self._linear(tmp3) - return y - - class TestJitMultipleLoading(unittest.TestCase): def setUp(self): self.linear_size = 4 @@ -389,20 +442,6 @@ class TestJitMultipleLoading(unittest.TestCase): name_set.add(var.name) -class LinearNetReturnHidden(fluid.dygraph.Layer): - def __init__(self, in_size, out_size): - super(LinearNetReturnHidden, self).__init__() - self._linear_1 = Linear(in_size, out_size) - self._linear_2 = Linear(in_size, out_size) - - @declarative - def forward(self, x): - y = self._linear_1(x) - z = self._linear_2(y) - loss = fluid.layers.mean(z) - return y, loss - - class TestJitPruneModelAndLoad(unittest.TestCase): def setUp(self): self.linear_size = 4 @@ -461,5 +500,197 @@ class TestJitPruneModelAndLoad(unittest.TestCase): fluid.dygraph.jit.load(self.model_path) +class TestJitSaveMultiCases(unittest.TestCase): + def setUp(self): + # enable dygraph mode + fluid.enable_dygraph() + # config seed + paddle.manual_seed(SEED) + paddle.framework.random._manual_program_seed(SEED) + + def verify_inference_correctness(self, layer, model_path, with_label=False): + layer.eval() + loaded_layer = paddle.jit.load(model_path) + loaded_layer.eval() + # inference & compare + x = paddle.to_variable(np.random.random((1, 784)).astype('float32')) + if with_label: + y = paddle.to_variable(np.random.random((1, 1)).astype('int64')) + pred, _ = layer(x, y) + pred = pred.numpy() + else: + pred = layer(x).numpy() + loaded_pred = loaded_layer(x).numpy() + self.assertTrue( + np.array_equal(pred, loaded_pred), + msg="Result diff when load and inference:\nlayer result:\n{}\n" \ + "loaded layer result:\n{}".format(pred, loaded_pred)) + + def test_no_prune_to_static_after_train(self): + layer = LinearNet(784, 1) + + train(layer) + + model_path = "test_no_prune_to_static_after_train" + paddle.jit.save(layer, model_path) + + self.verify_inference_correctness(layer, model_path) + + def test_no_prune_to_static_no_train(self): + layer = LinearNetWithInputSpec(784, 1) + + model_path = "test_no_prune_to_static_no_train" + paddle.jit.save(layer, model_path) + + self.verify_inference_correctness(layer, model_path) + + def test_no_prune_no_to_static_after_train(self): + layer = LinearNetNotDeclarative(784, 1) + + train(layer) + + model_path = "test_no_prune_no_to_static_after_train" + paddle.jit.save( + layer, + model_path, + input_spec=[InputSpec( + shape=[None, 784], dtype='float32')]) + + self.verify_inference_correctness(layer, model_path) + + def test_no_prune_no_to_static_after_train_with_examples(self): + layer = LinearNetNotDeclarative(784, 1) + + example_inputs, _, _ = train(layer) + + model_path = "test_no_prune_no_to_static_after_train_with_examples" + fluid.dygraph.jit.save( + layer=layer, model_path=model_path, input_spec=example_inputs) + + self.verify_inference_correctness(layer, model_path) + + def test_no_prune_no_to_static_no_train(self): + layer = LinearNetNotDeclarative(784, 1) + + model_path = "test_no_prune_no_to_static_no_train" + paddle.jit.save( + layer, + model_path, + input_spec=[InputSpec( + shape=[None, 784], dtype='float32')]) + + self.verify_inference_correctness(layer, model_path) + + def test_prune_to_static_after_train(self): + layer = LinerNetWithLabel(784, 1) + + out = train_with_label(layer) + + model_path = "test_prune_to_static_after_train" + configs = paddle.SaveLoadConfig() + configs.output_spec = [out] + paddle.jit.save( + layer, + model_path, + input_spec=[ + InputSpec( + shape=[None, 784], dtype='float32', name="image") + ], + configs=configs) + + self.verify_inference_correctness(layer, model_path, True) + + def test_prune_to_static_no_train(self): + layer = LinerNetWithLabel(784, 1) + + model_path = "test_prune_to_static_no_train" + configs = paddle.SaveLoadConfig() + # TODO: no train, cannot get output_spec var here + # now only can use index + configs.output_spec = layer.forward.outputs[:1] + paddle.jit.save( + layer, + model_path, + input_spec=[ + InputSpec( + shape=[None, 784], dtype='float32', name="image") + ], + configs=configs) + + self.verify_inference_correctness(layer, model_path, True) + + def test_no_prune_input_spec_name_warning(self): + layer = LinearNetWithInputSpec(784, 1) + + train(layer) + + model_path = "test_no_prune_input_spec_name_warning" + paddle.jit.save( + layer, + model_path, + input_spec=[InputSpec( + shape=[None, 784], dtype='float32')]) + paddle.jit.save( + layer, + model_path, + input_spec=[ + InputSpec( + shape=[None, 784], dtype='float32', name='feed_input') + ]) + + self.verify_inference_correctness(layer, model_path) + + def test_not_prune_output_spec_name_warning(self): + layer = LinearNet(784, 1) + + train(layer) + + model_path = "test_not_prune_output_spec_name_warning" + configs = paddle.SaveLoadConfig() + out = paddle.to_variable(np.random.random((1, 1)).astype('float')) + configs.output_spec = [out] + paddle.jit.save(layer, model_path, configs=configs) + + self.verify_inference_correctness(layer, model_path) + + def test_prune_input_spec_name_error(self): + layer = LinerNetWithLabel(784, 1) + + model_path = "test_prune_input_spec_name_error" + with self.assertRaises(ValueError): + paddle.jit.save( + layer, + model_path, + input_spec=[InputSpec( + shape=[None, 784], dtype='float32')]) + with self.assertRaises(ValueError): + paddle.jit.save( + layer, + model_path, + input_spec=[ + InputSpec( + shape=[None, 784], dtype='float32', name='feed_input') + ]) + + def test_prune_output_spec_name_error(self): + layer = LinerNetWithLabel(784, 1) + + train_with_label(layer) + + model_path = "test_prune_to_static_after_train" + configs = paddle.SaveLoadConfig() + out = paddle.to_variable(np.random.random((1, 1)).astype('float')) + configs.output_spec = [out] + with self.assertRaises(ValueError): + paddle.jit.save( + layer, + model_path, + input_spec=[ + InputSpec( + shape=[None, 784], dtype='float32', name="image") + ], + configs=configs) + + if __name__ == '__main__': unittest.main()