diff --git a/paddle/fluid/operators/run_program_op.h b/paddle/fluid/operators/run_program_op.h index ae09e87473d23d16a5f73b42b4f5a4e8e641c0bc..830334043c4d703e7fafbb1565bd896da0264a16 100644 --- a/paddle/fluid/operators/run_program_op.h +++ b/paddle/fluid/operators/run_program_op.h @@ -49,13 +49,13 @@ static void CheckInputVarStatus(const Variable &var, var.IsType(), true, platform::errors::InvalidArgument( "The input variable %s of " - "RunProgram(Grad)Op(StaticModelRunner) holds " + "RunProgram(Grad)Op holds " "wrong type. Expect type is LoDTensor, but receive type is %s.", var_name, platform::demangle(framework::ToTypeName(var.Type())))); PADDLE_ENFORCE_EQ( var.Get().IsInitialized(), true, platform::errors::InvalidArgument("The tensor in input variable %s of " - "RunProgram(Grad)Op(StaticModelRunner) " + "RunProgram(Grad)Op " "is not initialized.", var_name)); } @@ -68,14 +68,14 @@ static void CheckOutputVarStatus(const Variable &src_var, src_var.IsType(), true, platform::errors::InvalidArgument( "The output variable %s get from " - "RunProgram(Grad)Op(StaticModelRunner)'s internal scope holds " + "RunProgram(Grad)Op's internal scope holds " "wrong type. Expect type is LoDTensor, but receive type is %s.", var_name, platform::demangle(framework::ToTypeName(src_var.Type())))); PADDLE_ENFORCE_EQ(src_var.Get().IsInitialized(), true, platform::errors::InvalidArgument( "The tensor in output variable %s get from " - "RunProgram(Grad)Op(StaticModelRunner)'s internal " + "RunProgram(Grad)Op's internal " "scope is not initialized.", var_name)); } else if (dst_var.IsType()) { @@ -83,20 +83,20 @@ static void CheckOutputVarStatus(const Variable &src_var, src_var.IsType(), true, platform::errors::InvalidArgument( "The output variable %s get from " - "RunProgram(Grad)Op(StaticModelRunner)'s internal scope holds " + "RunProgram(Grad)Op's internal scope holds " "wrong type. Expect type is SelectedRows, but receive type is %s.", var_name, platform::demangle(framework::ToTypeName(src_var.Type())))); PADDLE_ENFORCE_EQ(src_var.Get().value().IsInitialized(), true, platform::errors::InvalidArgument( "The tensor in output variable %s get from " - "RunProgram(Grad)Op(StaticModelRunner)'s " + "RunProgram(Grad)Op's " "internal scope is not initialized.", var_name)); } else { PADDLE_THROW(platform::errors::InvalidArgument( - "The RunProgram(Grad)Op(StaticModelRunner) only support output " + "The RunProgram(Grad)Op only support output " "variable of type LoDTensor or SelectedRows, " "but received variable %s's type is %s", var_name, platform::demangle(framework::ToTypeName(dst_var.Type())))); @@ -143,7 +143,7 @@ static void ShareVarsFromScope(const std::vector &vars, auto *var = scope->FindVar(var_names[i]); PADDLE_ENFORCE_NOT_NULL( var, platform::errors::NotFound("The output variable %s is not in " - "RunProgram(Grad)Op(StaticModelRunner)'" + "RunProgram(Grad)Op'" "s internal scope.", var_names[i])); CheckOutputVarStatus(*var, *vars[i], var_names[i]); diff --git a/python/paddle/fluid/dygraph/__init__.py b/python/paddle/fluid/dygraph/__init__.py index 22de864dd696100cd7859e33ad935cd6bb10b9f5..f990d02342be78fe998cebfa40ed8b348cf54b2a 100644 --- a/python/paddle/fluid/dygraph/__init__.py +++ b/python/paddle/fluid/dygraph/__init__.py @@ -44,6 +44,9 @@ from .backward_strategy import * from . import jit from .jit import * +from . import io +from .io import * + from . import static_runner from .static_runner import StaticModelRunner @@ -63,5 +66,6 @@ __all__ += checkpoint.__all__ __all__ += learning_rate_scheduler.__all__ __all__ += backward_strategy.__all__ __all__ += jit.__all__ +__all__ += io.__all__ __all__ += rnn.__all__ __all__ += ['ProgramTranslator'] diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py index 5f1ed75735197606bc01ca31d27de5a128feca93..9701ebd7b4fccf21afa3af161a99b63fbe8f847b 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py @@ -36,6 +36,7 @@ from paddle.fluid.wrapped_decorator import signature_safe_contextmanager from paddle.fluid.dygraph.base import param_guard from paddle.fluid.data_feeder import check_type from paddle.fluid.dygraph.dygraph_to_static.partial_program import partial_program_from +from paddle.fluid.annotations import deprecated __all__ = ['ProgramTranslator', 'convert_to_static'] @@ -125,6 +126,9 @@ class FunctionSpec(object): self._args = args self._kwargs = kwargs + dyfunc = getattr(func, '__wrapped__', func) + self._dyfunc_code = inspect.getsource(dyfunc) + def is_method(self): return self._args and isinstance(self._args[0], layers.Layer) @@ -198,7 +202,9 @@ class FunctionSpec(object): # Note: if dygraph function is a method of class, # consider instance info as hash key. if self.is_method(): - return self._dyfunc, self._args[0] + # NOTE: we can use Layer's (instance + function code) as hash key. + # An instance will not hold two identical methods + return self._dyfunc_code, self._args[0] else: return self._dyfunc @@ -312,6 +318,17 @@ class ProgramCache(object): self._caches[item] = self._build_once(item) return self._caches[item] + def get_program(self, item): + if not isinstance(item, FunctionSpec): + raise ValueError( + "Input item's type should be FunctionSpec, but received %s" % + type(item)) + if item not in self._caches: + raise RuntimeError( + "Failed to find program for input item, please decorate input function by `@declarative`." + ) + return self._caches[item] + def last(self): assert len( self._caches) >= 1, "No valid cached program in ProgramCache." @@ -633,6 +650,7 @@ class ProgramTranslator(object): source_code = ast_to_source_code(root_wrapper.node) return source_code + @deprecated(since='2.0', instead="paddle.imperative.jit.save") @switch_to_static_graph def save_inference_model(self, dirname, feed=None, fetch=None): """ diff --git a/python/paddle/fluid/dygraph/io.py b/python/paddle/fluid/dygraph/io.py new file mode 100644 index 0000000000000000000000000000000000000000..38e4e517836ed8ddbeb36fb68a0c34fa9826f233 --- /dev/null +++ b/python/paddle/fluid/dygraph/io.py @@ -0,0 +1,772 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import six +import pickle +import numpy as np + +from paddle import compat as cpt +from paddle.fluid import core +from paddle.fluid import framework +from paddle.fluid import backward +from paddle.fluid.dygraph import layers +from paddle.fluid.layers import nn +from paddle.fluid.dygraph.base import switch_to_static_graph + +__all__ = ['TranslatedLayer'] + +VARIABLE_FILENAME = "__variables__" +EXTRA_VAR_INFO_FILENAME = "__variables.info__" + + +def _load_program_desc(model_file_path): + # 1. parse program desc + with open(model_file_path, "rb") as f: + program_desc_str = f.read() + + program_desc = core.ProgramDesc(program_desc_str) + if not core._is_program_version_supported(program_desc._version()): + raise ValueError("Unsupported program version: %d\n" % + program_desc._version()) + + return program_desc + + +def _is_persistable(var_desc): + if var_desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \ + var_desc.type() == core.VarDesc.VarType.FETCH_LIST or \ + var_desc.type() == core.VarDesc.VarType.READER or \ + var_desc.type() == core.VarDesc.VarType.RAW: + return False + return var_desc.persistable() + + +def _is_parameter(persistable_var_desc, program_desc): + # 1. firstly, param should be input of op + input_ops = [] # op can be repeated + for block_idx in six.moves.range(program_desc.num_blocks()): + block = program_desc.block(block_idx) + for op_idx in six.moves.range(block.op_size()): + op = block.op(op_idx) + # NOTE: parameter is the input of a certain op + if persistable_var_desc.name() in op.input_arg_names(): + input_ops.append(op) + # 2. secondly, param should not be output of op or be same op's output + for block_idx in six.moves.range(program_desc.num_blocks()): + block = program_desc.block(block_idx) + for op_idx in six.moves.range(block.op_size()): + op = block.op(op_idx) + if persistable_var_desc.name() in op.output_arg_names(): + # such as batch_norm_op + if op in input_ops: + continue + else: + return False + return True + + +def _get_persistable_vars(program_desc): + persistable_vars = [] + for i in six.moves.range(program_desc.num_blocks()): + block = program_desc.block(i) + persistable_vars.extend(list(filter(_is_persistable, block.all_vars()))) + return persistable_vars + + +def _get_persistable_var_names(program_desc): + """ + Get all persistable variable names in ProgramDesc. + """ + var_names = [] + persistable_vars = _get_persistable_vars(program_desc) + for var in persistable_vars: + var_names.append(var.name()) + return var_names + + +def _get_all_var_names(program_desc): + all_var_names = set() + for i in six.moves.range(program_desc.num_blocks()): + block = program_desc.block(i) + for var in block.all_vars(): + all_var_names.add(var.name()) + return all_var_names + + +def _append_loaded_suffix(name): + """ + Append loaded suffix to the given variable name + e.g. x ==> x@LOADED + """ + suffix = core.loaded_var_suffix() + name = cpt.to_text(name) + if suffix not in name: + name = name + suffix + return name + + +def _remove_loaded_suffix(name): + """ + Remove loaded suffix to the given variable name + e.g. x@LOADED ==> x + """ + suffix = core.loaded_var_suffix() + name = cpt.to_text(name) + return name.replace(suffix, '') + + +def _append_loaded_suffix_to_var(program_desc): + persistable_vars = _get_persistable_vars(program_desc) + for var_desc in persistable_vars: + old_name = var_desc.name() + new_name = _append_loaded_suffix(var_desc.name()) + var_desc.set_name(new_name) + for block_idx in six.moves.range(program_desc.num_blocks()): + block = program_desc.block(block_idx) + for op_idx in six.moves.range(block.op_size()): + op = block.op(op_idx) + op._rename_input(old_name, new_name) + op._rename_output(old_name, new_name) + + +@switch_to_static_graph +def _build_program_by_desc(program_desc): + prog = framework.Program() + prog.desc = program_desc + prog.blocks = [ + framework.Block(prog, i) + for i in six.moves.range(prog.desc.num_blocks()) + ] + prog._sync_with_cpp() + return prog + + +def _change_is_test_status(program_desc, is_test): + # change all `is_test` attributes + for i in six.moves.range(program_desc.num_blocks()): + block = program_desc.block(i) + for j in six.moves.range(block.op_size()): + op = block.op(j) + if op.has_attr('is_test'): + op._set_attr('is_test', is_test) + + +class _ProgramHolder(object): + """ + Holds the execution information of a Program. + + _ProgramHolder is the execution unit of TranslatedLayer, + if TranslatedLayer contains multiple _ProgramHolder, + it can execute multiple methods + + _ProgramHolder is an internal concept. + """ + + def __init__(self, program_desc): + super(_ProgramHolder, self).__init__() + + # input, output, persistable var info + self._input_names = [] + self._persistable_names = [] + self._output_descs = [] + + # execution scope + self._inner_scope = core.Scope() + + # forward program + self._infer_program_desc = self._preprocess(program_desc) + # forward + backward program + self._train_program_desc = self._append_backward_desc( + self._infer_program_desc) + + @property + def infer_program(self): + return self._infer_program_desc + + @property + def train_program(self): + return self._train_program_desc + + @property + def input_names(self): + return self._input_names + + @property + def output_decs(self): + return self._output_descs + + @property + def persistable_names(self): + return self._persistable_names + + @property + def scope(self): + return self._inner_scope + + def _preprocess(self, program_desc): + # 1. Prune original program + # remove feed, fetch and scale-1 op, remove op_callstack attr + ops_to_remove = [] + root_block = program_desc.block(0) + for i in six.moves.range(root_block.op_size()): + op = root_block.op(i) + if op.type() == 'feed': + ops_to_remove.append(i) + feed_var_name = cpt.to_bytes(op.input('X')[0]) + root_block._remove_var(feed_var_name) + self._input_names.append(cpt.to_bytes(op.output('Out')[0])) + elif op.type() == 'scale' and op.output('Out')[0].startswith( + 'save_infer_model/scale_'): + ops_to_remove.append(i) + out_var_name = cpt.to_bytes(op.output('Out')[0]) + root_block._remove_var(out_var_name) + self._output_descs.append( + root_block.find_var(cpt.to_bytes(op.input('X')[0]))) + elif op.type() == 'fetch': + ops_to_remove.append(i) + fetch_var_name = cpt.to_bytes(op.output('Out')[0]) + root_block._remove_var(fetch_var_name) + # NOTE: some old pre-train models have no extra scale_op + if not op.input('X')[0].startswith('save_infer_model/scale_'): + self._output_descs.append( + root_block.find_var(cpt.to_bytes(op.input('X')[0]))) + else: + if op.has_attr("op_callstack"): + op.remove_attr("op_callstack") + + for op_idx in reversed(ops_to_remove): + root_block._remove_op(op_idx, op_idx + 1) + + # 2. Input processing, reverse feed vars + self._input_names.reverse() + + # 3. Output processing, add scale for outputs + tmp_program = _build_program_by_desc(program_desc) + # NOTE: [why need append scale for outputs] + # When dealing with some more complex pre-training models, there + # will be situations where the pre-training model has multiple + # fetch outputs. In the scenario of multiple fetch outputs, + # there is a special case where multiple outputs of the model + # may be on the same branch. According to the user's subsequent + # use, multiple outputs may be associated with multiple branches. + # These subsequent operations are added in TranslatedLayer is + # agnostic during initialization, which results in subsequent + # gradient accumulation operations that are required on the + # output node in the middle of the branch will not be performed, + # resulting in error, details see pull request: + # [https://github.com/PaddlePaddle/Paddle/pull/24627] + self._append_scale_to_output(tmp_program) + + # 4. Persistable vars processing + # - append @LOADED suffix to persistable vars + # NOTE: [why need to append suffix to persistable vars] + # Dygraph and static graph mode use the same naming mechanism. + # If users want to load the model fine-tune, it is possible + # to add the existing Layer in the loaded model to enhance + # the network. For example, the original saved model has linear, + # and later after loading, a new linear is added. At this time, + # there will be a problem of duplicate names, so here is unified + # to add the LOADED suffix to the parameters of the model loaded + # during training. And in order to avoid multiple @LOADED suffix + # are appended to variable name, we only append @LOADED suffix to + # the variable that not contains @LOADED suffix. + _append_loaded_suffix_to_var(program_desc) + # - get persistable var + self._persistable_names = _get_persistable_var_names(program_desc) + + return program_desc + + @switch_to_static_graph + def _append_scale_to_output(self, program): + # 1. append scale & save var + scale_output_vars = [] + with framework.program_guard(program): + for i, out in enumerate(self._output_descs): + var = program.global_block().var(out.name()) + var = nn.scale( + var, 1., name="static_model_runner/scale_{}".format(i)) + scale_output_vars.append(var) + # 2. update output names & descs + for i, var in enumerate(scale_output_vars): + self._output_descs[i] = var.desc + + @switch_to_static_graph + def _append_backward_desc(self, infer_program_desc): + program_desc_copy = core.ProgramDesc(infer_program_desc) + + # 1. set all `is_test` attributes to False + _change_is_test_status(program_desc_copy, False) + + # 2. prepare program and related var + # NOTE: To reuse backward interfaces, build Program firstly. + # Originally, there is no need to build a program, but need to almost + # rewrite a series of methods for append_backward for program_desc. + # Therefore, in order to reuse the method of backward.py, build the program here. + program = _build_program_by_desc(program_desc_copy) + + targets = [] + for out in self._output_descs: + targets.append(program.global_block().var(out.name())) + + # 3. append backward + backward.gradients(targets=targets, inputs=[]) + return program.desc + + +# [ TranslatedLayer : Run program in imperative mode ] +# +# DESIGN IDEA: using an special operator `RunProgram`, execute program inside operator. +# +# Op's Inputs: +# - the input variable of the user feed +# - the necessary parameters of the network +# Op's Outputs: +# - the output variable of fetch +# +# This op receives a complete program desc, internally creates scope +# and executor, executes this program. Key points: +# +# 1. Data Sharing: +# The varBase of the dynamic graph is not in the scope, so before the op +# executes the program internally, create persistent variables with the +# same name as feed, parameters, and fetch in the scope, and share the +# LoDTensor of the op input. +# +# 2. Forward and Backward Separation: +# Because the dynamic graph op performs the forward and backward separately, +# in the forward op RunProgram, we only execute the forward part of whole program, +# and in the backward op RunProgramGrad, we execute the backward part of program. +# We can not separate the program into forward and backward part, which will +# make some control flow execution logic wrong. + + +# NOTE: [compatible] deal with model saved by save_inference_model, +# which need get var info from program desc +def _load_persistable_vars_by_program(model_path, + program_holder, + params_filename=None): + # make sure the path has been checked + persistable_vars = _get_persistable_vars(program_holder.infer_program) + load_var_dict = {} + for each_var in persistable_vars: + orig_each_name = _remove_loaded_suffix(each_var.name()) + if _is_parameter(each_var, program_holder.infer_program): + # create output varbase + new_var = framework.ParamBase( + shape=each_var.shape(), + dtype=each_var.dtype(), + name=each_var.name(), + type=each_var.type(), + persistable=True) + else: + new_var = framework._varbase_creator( + type=each_var.type(), + name=each_var.name(), + shpae=each_var.shape(), + dtype=each_var.dtype(), + persistable=True) + if params_filename is None: + framework._dygraph_tracer().trace_op( + type='load', + inputs={}, + outputs={'Out': new_var}, + attrs={'file_path': os.path.join(model_path, orig_each_name)}) + new_var.stop_gradient = False + load_var_dict[each_var.name()] = new_var + + if params_filename is not None: + load_var_list = [] + for name in sorted(load_var_dict.keys()): + load_var_list.append(load_var_dict[name]) + + framework._dygraph_tracer().trace_op( + type='load_combine', + inputs={}, + outputs={'Out': load_var_list}, + attrs={'file_path': os.path.join(model_path, params_filename)}) + + for each_var in persistable_vars: + if not _is_parameter(each_var, program_holder.infer_program): + continue + param = load_var_dict[each_var.name()] + param.stop_gradient = False + + # NOTE: [Recovery stop gradient information based on the program] + # After loading the model, the stop_gradient information + # of the original variable is lost, but if a parameter does not + # have a corresponding @GRAD variable in the backward program, + # it can be said that it is also stop_gradient + all_var_names = _get_all_var_names(program_holder.train_program) + for var_name in load_var_dict: + grad_var_name = var_name + core.grad_var_suffix() + if grad_var_name not in all_var_names: + load_var_dict[var_name].stop_gradient = True + + return load_var_dict + + +def _load_persistable_vars(model_path, + var_info_path, + separate_params=False, + params_filename=None): + # 1. load extra var info + with open(var_info_path, 'rb') as f: + extra_var_info = pickle.load(f) if six.PY2 else pickle.load( + f, encoding='latin1') + + # 2. construct var dict + load_var_dict = dict() + load_var_list = [] + # NOTE: some var may not be Parameter + for name in sorted(extra_var_info): + # append suffix, see [why need to append suffix to persistable vars] + new_name = _append_loaded_suffix(name) + # create output varbase + if extra_var_info[name].get('trainable', None) is not None: + # use default shape and dtype + new_var = framework.ParamBase( + shape=[1], # only to pass check, this shape is not meaningful + dtype=core.VarDesc.VarType.FP32, + name=new_name, + persistable=True) + else: + new_var = framework._varbase_creator( + name=new_name, persistable=True) + + # load separate vars + if separate_params is True: + framework._dygraph_tracer().trace_op( + type='load', + inputs={}, + outputs={'Out': new_var}, + attrs={'file_path': os.path.join(model_path, name)}) + + new_var.stop_gradient = extra_var_info[name]['stop_gradient'] + load_var_dict[new_name] = new_var + load_var_list.append(new_var) + + # 3. load all vars + if separate_params is False: + if params_filename is not None: + var_file_path = os.path.join(model_path, params_filename) + else: + var_file_path = os.path.join(model_path, VARIABLE_FILENAME) + framework._dygraph_tracer().trace_op( + type='load_combine', + inputs={}, + outputs={'Out': load_var_list}, + attrs={'file_path': var_file_path}) + + return load_var_dict + + +def _construct_program_holders(model_path, model_filename=None): + # make sure the path has been checked + program_holder_dict = dict() + + if model_filename is not None: + # [compatible] if assign model_filename, only can load one program as Layer.forward + model_filename = os.path.basename(model_filename) + model_file_path = os.path.join(model_path, model_filename) + program_holder_dict['forward'] = _ProgramHolder( + _load_program_desc(model_file_path)) + else: + for _, _, file_names in os.walk(model_path): + for name in file_names: + if 'model' in name: + model_file_path = os.path.join(model_path, name) + method_name = name.strip('_') + if method_name == 'model': + method_name = 'forward' + else: + method_name.replace('model', '') + program_holder_dict[method_name] = _ProgramHolder( + _load_program_desc(model_file_path)) + + return program_holder_dict + + +def _construct_params_and_buffers(model_path, + programs, + separate_params=False, + params_filename=None): + var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME) + if os.path.exists(var_info_path): + var_dict = _load_persistable_vars(model_path, var_info_path, + separate_params, params_filename) + else: + var_dict = _load_persistable_vars_by_program( + model_path, programs['forward'], params_filename) + return var_dict + + +class TranslatedLayer(layers.Layer): + """ + TranslatedLayer is a imperative Layer for holding the model loaded by + :ref:`api_imperative_jit_load` . It can be used like a general Layer + object in eval or train mode. + + .. note: + The TranslatedLayer objects should not be created by constructor, it only can be loaded and constructed by :ref:`api_imperative_jit_load` . + + Examples: + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + from paddle.fluid.dygraph import Linear + from paddle.fluid.dygraph import declarative + + BATCH_SIZE = 32 + BATCH_NUM = 20 + + def random_batch_reader(): + def _get_random_images_and_labels(image_shape, label_shape): + image = np.random.random(size=image_shape).astype('float32') + label = np.random.random(size=label_shape).astype('int64') + return image, label + + def __reader__(): + for _ in range(BATCH_NUM): + batch_image, batch_label = _get_random_images_and_labels( + [BATCH_SIZE, 784], [BATCH_SIZE, 1]) + yield batch_image, batch_label + + return __reader__ + + class LinearNet(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(LinearNet, self).__init__() + self._linear = Linear(in_size, out_size) + + @declarative + def forward(self, x): + return self._linear(x) + + # enable dygraph mode + fluid.enable_dygraph() + + # 1. train & save model. + # create network + net = LinearNet(784, 1) + adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) + # create data loader + train_loader = fluid.io.DataLoader.from_generator(capacity=5) + train_loader.set_batch_generator(random_batch_reader()) + # train + for data in train_loader(): + img, label = data + label.stop_gradient = True + + cost = net(img) + + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + + avg_loss.backward() + adam.minimize(avg_loss) + net.clear_gradients() + + model_path = "linear.example.model" + fluid.dygraph.jit.save( + layer=net, + model_path=model_path, + input_spec=[img]) + + # 2. load model as TranslatedLayer + translated_layer = fluid.dygraph.jit.load(model_path) + # inference + translated_layer.eval() + x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32')) + pred = translated_layer(x) + # fine-tune + translated_layer.train() + adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=translated_layer.parameters()) + train_loader = fluid.io.DataLoader.from_generator(capacity=5) + train_loader.set_batch_generator(random_batch_reader()) + for data in train_loader(): + img, label = data + label.stop_gradient = True + + cost = translated_layer(img) + + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + + avg_loss.backward() + adam.minimize(avg_loss) + translated_layer.clear_gradients() + """ + + def __init__(self, programs, persistable_vars): + super(TranslatedLayer, self).__init__() + + if not isinstance(programs, dict): + raise TypeError( + "TranslatedLayer need to use _ProgramHolder's dict for initialization." + ) + if not isinstance(persistable_vars, dict): + raise TypeError( + "TranslatedLayer need to use persisatbale variable dict for initialization." + ) + + self._program_holder_dict = programs + + for name, var in persistable_vars.items(): + if isinstance(var, framework.ParamBase): + self.add_parameter(name, var) + elif isinstance(var, core.VarBase): + self.register_buffer(name, var) + else: + raise TypeError( + "Adding persistent variable which to layer is not supported now" + ) + + self._is_test = True + + @staticmethod + @framework.dygraph_only + def _construct(model_path, configs=None): + # 0. dir and filename check + model_path = os.path.normpath(model_path) + if not os.path.isdir(model_path): + raise ValueError("There is no directory named '%s'" % model_path) + model_filename = None + params_filename = None + separate_params = False + if configs is not None: + model_filename = configs.model_filename + params_filename = configs.params_filename + separate_params = configs.separate_params + + # 1. load program desc & construct _ProgramHolder + programs = _construct_program_holders(model_path, model_filename) + + # 2. load layer parameters & parameter attirbutes + persistable_vars = _construct_params_and_buffers( + model_path, programs, separate_params, params_filename) + + # 3. construct TranslatedLayer object + translated_layer = TranslatedLayer(programs, persistable_vars) + + # 4. create TranslatedLayer's execution method + for method_name, program_holder in programs.items(): + setattr(TranslatedLayer, method_name, + TranslatedLayer._execution_method_creator(method_name, + program_holder)) + + # 5. set TranslatedLayer's default mode to eval + translated_layer.eval() + + return translated_layer + + @staticmethod + def _execution_method_creator(method_name, program_holder): + def __impl__(self, *input): + # 1. prepare inputs, outputs, attrs + input_vars = [] + for i, value in enumerate(input): + if not isinstance(value, (np.ndarray, core.VarBase)): + raise TypeError( + "The type of input in TranslatedLayer must be numpy array or Variable(VarBase), but received %s." + % type(value)) + # NOTE: In order to unify the API, firstly convert the input to VarBase + if isinstance(value, np.ndarray): + var = core.VarBase( + value=value, + name=program_holder.input_names[i], + persistable=False, + place=framework._current_expected_place(), + zero_copy=True) + else: + var = value + # NOTE: we changed var name here, + # but it may be an important name set by user + var.name = program_holder.input_names[i] + input_vars.append(var) + + persistable_vars = [] + for var_name in program_holder.persistable_names: + if var_name in self._parameters: + persistable_vars.append(self._parameters[var_name]) + elif var_name in self._buffers: + persistable_vars.append(self._buffers[var_name]) + else: + raise ValueError( + "The persistable variable %s is not exists in current TranslatedLayer." + % var_name) + + output_vars = [] + for var_desc in program_holder.output_decs: + var = core.VarBase(var_desc.dtype(), + var_desc.shape(), + var_desc.name(), var_desc.type(), False) + output_vars.append(var) + + # hold forward variables + tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [], + "program_out_scope", + core.VarDesc.VarType.STEP_SCOPES, True) + tmp_scope_vec.value().set_scope(program_holder.scope) + + # 2. run prorgam by op + trace_program = program_holder.infer_program if self._is_test else program_holder.train_program + end_op_index = program_holder.infer_program.block(0).op_size() + framework._dygraph_tracer().trace_op( + type='run_program', + inputs={'X': input_vars, + 'Params': persistable_vars}, + outputs={'Out': output_vars, + 'OutScope': tmp_scope_vec}, + attrs={ + 'global_block': trace_program.block(0), + 'start_op_index': 0, + 'end_op_index': end_op_index, + 'is_test': self._is_test + }) + + # NOTE: [ why need set param's gradient type here ] + # if user set sparse gradient mode, the param's gradient + # will be SelectedRows, not LoDTensor. But tracer will just + # set param grad VarBase by forward VarBase(LoDTensor) + # If we don't change grad_var type here, RunProgramOp need + # transform SelectedRows to LoDTensor forcely, it may not + # be user wanted result. + for persistable_var in persistable_vars: + grad_var_name = var.name + core.grad_var_suffix() + grad_var = trace_program.block(0).find_var( + cpt.to_bytes(grad_var_name)) + # NOTE: cannot find var desc maybe not problem, + # such as in batch_norm + if grad_var is None: + continue + persistable_var._set_grad_type(grad_var.type()) + + # 3. prepare output, keep same form with inputs + outs = output_vars + if len(output_vars) == 1: + outs = output_vars[0] + return outs + + __impl__.__name__ = method_name + return __impl__ + + def train(self): + self._is_test = False + + def eval(self): + self._is_test = True diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py index 0dd369ca0a2aab00cafa2d849c318e7db0af83be..bd468b55d812e76841cd946d30e5e9a9503c2a65 100644 --- a/python/paddle/fluid/dygraph/jit.py +++ b/python/paddle/fluid/dygraph/jit.py @@ -14,17 +14,22 @@ from __future__ import print_function -__all__ = ['TracedLayer', 'declarative', 'dygraph_to_static_func'] +import os +import six +import pickle import warnings from paddle.fluid import core from paddle.fluid.compiler import CompiledProgram from paddle.fluid.dygraph.base import program_desc_tracing_guard, switch_to_static_graph -from paddle.fluid.dygraph.dygraph_to_static.program_translator import ProgramTranslator +from paddle.fluid.dygraph.dygraph_to_static.program_translator import ProgramTranslator, FunctionSpec from paddle.fluid.dygraph.layers import Layer from paddle.fluid.executor import Executor, scope_guard -from paddle.fluid.framework import Program, Block, Variable, _dygraph_tracer, dygraph_only, _dygraph_guard, _current_expected_place, in_dygraph_mode +from paddle.fluid.framework import Program, Block, Variable, ParamBase, _dygraph_tracer, dygraph_only, _dygraph_guard, _current_expected_place, in_dygraph_mode from paddle.fluid.wrapped_decorator import wrap_decorator +from paddle.fluid.dygraph.io import TranslatedLayer, VARIABLE_FILENAME, EXTRA_VAR_INFO_FILENAME + +__all__ = ['TracedLayer', 'declarative', 'dygraph_to_static_func'] def create_program_from_desc(program_desc): @@ -166,6 +171,802 @@ def _declarative_(dygraph_func): declarative = wrap_decorator(_declarative_) +class SaveLoadConfig(object): + """ + The additional configuration options may be used in function + :ref:`api_imperative_jit_save` that save :ref:`api_imperative_TranslatedLayer` + or used in function :ref:`api_imperative_jit_load` that + load :ref:`api_imperative_TranslatedLayer` . + + Examples: + 1. Using ``SaveLoadConfig`` when saving model + + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + from paddle.fluid.dygraph import Linear + from paddle.fluid.dygraph import declarative + + class SimpleNet(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(SimpleNet, self).__init__() + self._linear = Linear(in_size, out_size) + + @declarative + def forward(self, x): + y = self._linear(x) + z = self._linear(y) + return z + + # enable dygraph mode + fluid.enable_dygraph() + + # train model + net = SimpleNet(8, 8) + adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) + x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + for i in range(10): + out = net(x) + loss = fluid.layers.mean(out) + loss.backward() + adam.minimize(loss) + net.clear_gradients() + + # use SaveLoadconfig when saving model + model_path = "simplenet.example.model" + configs = fluid.dygraph.jit.SaveLoadConfig() + configs.model_filename = "__simplenet__" + fluid.dygraph.jit.save( + layer=net, + model_path=model_path, + input_spec=[x], + configs=configs) + + 2. Using ``SaveLoadConfig`` when loading model + + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + + # enable dygraph mode + fluid.enable_dygraph() + + # use SaveLoadconfig when loading model + model_path = "simplenet.example.model" + configs = fluid.dygraph.jit.SaveLoadConfig() + configs.model_filename = "__simplenet__" + infer_net = fluid.dygraph.jit.load(model_path, configs=configs) + # inference + x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + pred = infer_net(x) + """ + + def __init__(self): + self._output_spec = None + self._model_filename = None + self._params_filename = None + self._separate_params = False + + # NOTE: Users rarely use following configs, so these configs are not open to users, + # reducing user learning costs, but we retain the configuration capabilities + + # If True, programs are modified to only support direct inference deployment. + # Otherwise,more information will be stored for flexible optimization and re-training. + # Currently, only True is supported + self._export_for_deployment = True + + # If True, It will save inference program only, and do not save params of Program + self._program_only = False + + @property + def output_spec(self): + """ + Selects the output targets of the saved model ( :ref:`api_imperative_TranslatedLayer` ). + By default, all return variables of original Layer's forward function + are kept as the output of the saved TranslatedLayer. + + The ``output_spec`` type should be list[Variable]. If the provided ``output_spec`` + list is not all output variables, the saved model will be pruned according to the + given ``output_spec`` list. + + .. note:: + The ``output_spec`` is only used when saving model. + + Examples: + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + from paddle.fluid.dygraph import Linear + from paddle.fluid.dygraph import declarative + + class SimpleNet(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(SimpleNet, self).__init__() + self._linear = Linear(in_size, out_size) + + @declarative + def forward(self, x): + y = self._linear(x) + z = self._linear(y) + loss = fluid.layers.mean(z) + return z, loss + + # enable dygraph mode + fluid.enable_dygraph() + + # train model + net = SimpleNet(8, 8) + adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) + x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + for i in range(10): + out, loss = net(x) + loss.backward() + adam.minimize(loss) + net.clear_gradients() + + # use SaveLoadconfig.output_spec + model_path = "simplenet.example.model.output_spec" + configs = fluid.dygraph.jit.SaveLoadConfig() + # only keep the predicted output in saved model, diccard loss + configs.output_spec = [out] + + fluid.dygraph.jit.save( + layer=net, + model_path=model_path, + input_spec=[x], + configs=configs) + + infer_net = fluid.dygraph.jit.load(model_path, configs=configs) + x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + # only have the predicted output + pred = infer_net(x) + """ + return self._output_spec + + @output_spec.setter + def output_spec(self, spec): + if not isinstance(spec, list): + raise TypeError( + "The SaveLoadConfig.output_spec should be 'list', but received input type is %s." + % type(input)) + for var in spec: + if not isinstance(var, core.VarBase): + raise TypeError( + "The element in SaveLoadConfig.output_spec list should be 'Variable', but received element's type is %s." + % type(var)) + self._output_spec = spec + + @property + def model_filename(self): + """ + The name of file to save the translated program of target Layer. + Default filename is :code:`__model__` . + + Exampels: + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + from paddle.fluid.dygraph import Linear + from paddle.fluid.dygraph import declarative + + class SimpleNet(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(SimpleNet, self).__init__() + self._linear = Linear(in_size, out_size) + + @declarative + def forward(self, x): + y = self._linear(x) + z = self._linear(y) + return z + + # enable dygraph mode + fluid.enable_dygraph() + + # train model + net = SimpleNet(8, 8) + adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) + x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + for i in range(10): + out = net(x) + loss = fluid.layers.mean(out) + loss.backward() + adam.minimize(loss) + net.clear_gradients() + + model_path = "simplenet.example.model.model_filename" + configs = fluid.dygraph.jit.SaveLoadConfig() + configs.model_filename = "__simplenet__" + + # saving with configs.model_filename + fluid.dygraph.jit.save( + layer=net, + model_path=model_path, + input_spec=[x], + configs=configs) + # [result] the saved model directory contains: + # __simplenet__ __variables__ __variables.info__ + + # loading with configs.model_filename + infer_net = fluid.dygraph.jit.load(model_path, configs=configs) + x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + pred = infer_net(x) + """ + return self._model_filename + + @model_filename.setter + def model_filename(self, filename): + if not isinstance(filename, six.string_types): + raise TypeError( + "The SaveLoadConfig.model_filename should be str, but received input's type is %s." + % type(filename)) + if len(filename) == 0: + raise ValueError( + "The SaveLoadConfig.model_filename is empty string.") + self._model_filename = filename + + @property + def params_filename(self): + """ + The name of file to save all persistable variables in target Layer. + Default file name is :code:`__variables__` . + + Exampels: + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + from paddle.fluid.dygraph import Linear + from paddle.fluid.dygraph import declarative + + class SimpleNet(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(SimpleNet, self).__init__() + self._linear = Linear(in_size, out_size) + + @declarative + def forward(self, x): + y = self._linear(x) + z = self._linear(y) + return z + + # enable dygraph mode + fluid.enable_dygraph() + + # train model + net = SimpleNet(8, 8) + adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) + x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + for i in range(10): + out = net(x) + loss = fluid.layers.mean(out) + loss.backward() + adam.minimize(loss) + net.clear_gradients() + + model_path = "simplenet.example.model.params_filename" + configs = fluid.dygraph.jit.SaveLoadConfig() + configs.params_filename = "__params__" + + # saving with configs.params_filename + fluid.dygraph.jit.save( + layer=net, + model_path=model_path, + input_spec=[x], + configs=configs) + # [result] the saved model directory contains: + # __model__ __params__ __variables.info__ + + # loading with configs.params_filename + infer_net = fluid.dygraph.jit.load(model_path, configs=configs) + x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + pred = infer_net(x) + """ + return self._params_filename + + @params_filename.setter + def params_filename(self, filename): + if not isinstance(filename, six.string_types): + raise TypeError( + "The SaveLoadConfig.params_filename should be str, but received input's type is %s." + % type(filename)) + if len(filename) == 0: + raise ValueError( + "The SaveLoadConfig.params_filename is empty string.") + self._params_filename = filename + + # NOTE: [why not use params_filename=None control params saved separately] + # The new save interface does not recommend parameters to be saved separately. + # Here, the concept should be separated as clearly as possible. + # Setting params_filename=None only means that the saved file name is set + # and without any other meaning. New separate_params control for file saved + # separately can makes the concept clearer. + @property + def separate_params(self): + """ + Configure whether to save the Layer parameters as separete files. + (In order to be compatible with the behavior of :ref:`api_fluid_io_save_inference_model` ) + + If True, each parameter will be saved to a file separately, the file name is the parameter name, + and the SaveLoadConfig.params_filename configuration will not take effect. Default False. + + Examples: + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + from paddle.fluid.dygraph import Linear + from paddle.fluid.dygraph import declarative + + class SimpleNet(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(SimpleNet, self).__init__() + self._linear = Linear(in_size, out_size) + + @declarative + def forward(self, x): + y = self._linear(x) + z = self._linear(y) + return z + + # enable dygraph mode + fluid.enable_dygraph() + + # train model + net = SimpleNet(8, 8) + adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) + x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + for i in range(10): + out = net(x) + loss = fluid.layers.mean(out) + loss.backward() + adam.minimize(loss) + net.clear_gradients() + + model_path = "simplenet.example.model.separate_params" + configs = fluid.dygraph.jit.SaveLoadConfig() + configs.separate_params = True + + # saving with configs.separate_params + fluid.dygraph.jit.save( + layer=net, + model_path=model_path, + input_spec=[x], + configs=configs) + # [result] the saved model directory contains: + # linear_0.b_0 linear_0.w_0 __model__ __variables.info__ + + # loading with configs.params_filename + infer_net = fluid.dygraph.jit.load(model_path, configs=configs) + x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + pred = infer_net(x) + """ + return self._separate_params + + @separate_params.setter + def separate_params(self, value): + if not isinstance(value, bool): + raise TypeError( + "The SaveLoadConfig.separate_params should be bool value, but received input's type is %s." + % type(value)) + self._separate_params = value + + +@switch_to_static_graph +def save(layer, model_path, input_spec=None, configs=None): + """ + Saves input declarative Layer as :ref:`api_imperative_TranslatedLayer` + format model, which can be used for inference or fine-tuning after loading. + + It will save the translated program and all related persistable + variables of input declarative Layer to given ``model_path``. + + The default saved translated program file name is ``__model__``, + and the default saved persistable variables file name is ``__variables__``, + and it also saved some additional variable description information to file + ``__varibales.info__``, these additional information is used in fine-tuning. + + The saved model can be loaded by follow APIs: + - :ref:`api_imperative_jit_load` + - :ref:`api_fluid_io_load_inference_model` (need pass ``params_filename='__variables__'``) + - Other C++ inference APIs + + Args: + layer (Layer): the Layer to be saved. The Layer should be decorated by `@declarative`. + model_path (str): the directory to save the model. + input_spec (list[Varibale], optional): Describes the input of the saved model. + It is the example inputs that will be passed to saved TranslatedLayer's forward + function. If None, all input variables of the original Layer's forward function + would be the inputs of the saved model. Default None. + configs (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object + that specifies additional configuration options. Default None. + Returns: + None + + Examples: + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + from paddle.fluid.dygraph import Linear + from paddle.fluid.dygraph import declarative + + BATCH_SIZE = 32 + BATCH_NUM = 20 + + def random_batch_reader(): + def _get_random_images_and_labels(image_shape, label_shape): + image = np.random.random(size=image_shape).astype('float32') + label = np.random.random(size=label_shape).astype('int64') + return image, label + + def __reader__(): + for _ in range(BATCH_NUM): + batch_image, batch_label = _get_random_images_and_labels( + [BATCH_SIZE, 784], [BATCH_SIZE, 1]) + yield batch_image, batch_label + + return __reader__ + + class LinearNet(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(LinearNet, self).__init__() + self._linear = Linear(in_size, out_size) + + @declarative + def forward(self, x): + return self._linear(x) + + # enable dygraph mode + fluid.enable_dygraph() + + # create network + net = LinearNet(784, 1) + adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) + # create data loader + train_loader = fluid.io.DataLoader.from_generator(capacity=5) + train_loader.set_batch_generator(random_batch_reader()) + # train + for data in train_loader(): + img, label = data + label.stop_gradient = True + + cost = net(img) + + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + + avg_loss.backward() + adam.minimize(avg_loss) + net.clear_gradients() + + # save model + model_path = "linear.example.model" + fluid.dygraph.jit.save( + layer=net, + model_path=model_path, + input_spec=[img]) + """ + + def get_inout_spec(all_vars, target_vars, return_name=False): + valid_vars = [var for var in all_vars if isinstance(var, Variable)] + valid_var_dict = {} + for var in valid_vars: + valid_var_dict[var.name] = var + if target_vars: + for i, var in enumerate(target_vars): + # check target var whether exists + if var.name not in valid_var_dict: + raise RuntimeError( + "The variable to feed/fetch are not exist.") + target_vars[i] = valid_var_dict[var.name] + else: + target_vars = valid_vars + if return_name: + target_vars = [var.name for var in target_vars] + + return target_vars + + # 1. input check + prog_translator = ProgramTranslator() + if not prog_translator.enable: + raise RuntimeError( + "The paddle.imperative.jit.save doesn't work when setting ProgramTranslator.enable=False." + ) + if not isinstance(layer, Layer): + raise TypeError( + "The input layer of paddle.imperative.jit.save should be 'Layer', but received layer type is %s." + % type(layer)) + + if configs is None: + configs = SaveLoadConfig() + + if input_spec is not None: + if not isinstance(input_spec, list): + raise TypeError( + "The input input_spec should be 'list', but received input_spec's type is %s." + % type(input_spec)) + for var in input_spec: + if not isinstance(var, core.VarBase): + raise TypeError( + "The element in input_spec list should be 'Variable', but received element's type is %s." + % type(var)) + + # 2. get program of declarative Layer.forward + prog_cache = prog_translator.get_program_cache() + # make dummy args & kwargs, to get excepted FunctionSpec + layer_func = FunctionSpec(type(layer).forward, [layer], {}) + concrete_program, _ = prog_cache.get_program(layer_func) + + # 3. share parameters from Layer to scope & record var info + scope = core.Scope() + state_dict = layer.state_dict() + extra_var_info = dict() + for structured_name, param_or_buffer in state_dict.items(): + # share to scope + param_or_buffer_tensor = scope.var(param_or_buffer.name).get_tensor() + src_tensor = param_or_buffer.value().get_tensor() + param_or_buffer_tensor._share_data_with(src_tensor) + # record var info + extra_info_dict = dict() + extra_info_dict['structured_name'] = structured_name + extra_info_dict['stop_gradient'] = param_or_buffer.stop_gradient + if isinstance(param_or_buffer, ParamBase): + extra_info_dict['trainable'] = param_or_buffer.trainable + extra_var_info[param_or_buffer.name] = extra_info_dict + + # 4. build input & output spec + input_var_names = get_inout_spec(concrete_program.inputs, input_spec, True) + output_vars = get_inout_spec(concrete_program.outputs, configs.output_spec) + + # 5. save inference model + from paddle.fluid.io import save_inference_model + + # VARIABLE_FILENAME keep nameing style consistent with '__model__' + if configs.params_filename is None: + configs.params_filename = VARIABLE_FILENAME + + with scope_guard(scope): + save_inference_model( + dirname=model_path, + feeded_var_names=input_var_names, + target_vars=output_vars, + executor=Executor(_current_expected_place()), + main_program=concrete_program.main_program.clone(), + model_filename=configs.model_filename, + params_filename=None + if configs.separate_params else configs.params_filename, + export_for_deployment=configs._export_for_deployment, + program_only=configs._program_only) + + # NOTE: [ Save extra variable info ] + # save_inference_model will lose some important variable information, including: + # - Variable name and correspondence (when saved variables as one file) + # - Variable.stop_gradient information + # - Which persistent variable are parameter and which are not + # - Parameter.trainable information + # + # The lost information cannot be recovered when it is loaded again, + # so if we want to perform fine-tune after loading, we may need to + # configure redundant information to proceed. + # + # Due to compatibility issues, we cannot change the original storage structure, + # but we can save these information in `jit.save` without changing the original + # storage to improve user experience. So we save extra information into + # file `__variables.info__` + extra_var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME) + with open(extra_var_info_path, 'wb') as f: + pickle.dump(extra_var_info, f, protocol=2) + + +@dygraph_only +def load(model_path, configs=None): + """ + :api_attr: imperative + + Load model saved by :ref:`api_imperative_jit_save` or :ref:`api_fluid_io_save_inference_model` + as :ref:`api_imperative_TranslatedLayer`, then performing inference or fine-tune training. + + .. note:: + For some historical reasons, if you load model saved by :ref:`api_fluid_io_save_inference_model`, + there will be the following limitations when using it in fine-tuning: + 1. Imperative mode do not support LoDTensor. All original model's feed targets or parametars that depend on LoD are temporarily unavailable. + 2. All saved model's feed targets need to be passed into TranslatedLayer's forwrad function. + 3. The variable's ``stop_gradient`` information is lost and can not be recovered. + 4. The parameter's ``trainable`` information is lost and can not be recovered. + + Args: + model_path (str): The directory path where the model is saved. + configs (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object that specifies + additional configuration options. Default None. + + Returns: + TranslatedLayer: A Layer object can run saved translated model. + + Examples: + 1. Load model saved by :ref:`api_imperative_jit_save` then performing inference and fine-tune training. + + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + from paddle.fluid.dygraph import Linear + from paddle.fluid.dygraph import declarative + + BATCH_SIZE = 32 + BATCH_NUM = 20 + + def random_batch_reader(): + def _get_random_images_and_labels(image_shape, label_shape): + image = np.random.random(size=image_shape).astype('float32') + label = np.random.random(size=label_shape).astype('int64') + return image, label + + def __reader__(): + for _ in range(BATCH_NUM): + batch_image, batch_label = _get_random_images_and_labels( + [BATCH_SIZE, 784], [BATCH_SIZE, 1]) + yield batch_image, batch_label + + return __reader__ + + class LinearNet(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(LinearNet, self).__init__() + self._linear = Linear(in_size, out_size) + + @declarative + def forward(self, x): + return self._linear(x) + + # enable dygraph mode + fluid.enable_dygraph() + + # 1. train & save model. + # create network + net = LinearNet(784, 1) + adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) + # create data loader + train_loader = fluid.io.DataLoader.from_generator(capacity=5) + train_loader.set_batch_generator(random_batch_reader()) + # train + for data in train_loader(): + img, label = data + label.stop_gradient = True + + cost = net(img) + + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + + avg_loss.backward() + adam.minimize(avg_loss) + net.clear_gradients() + + model_path = "linear.example.model" + fluid.dygraph.jit.save( + layer=net, + model_path=model_path, + input_spec=[img]) + + # 2. load model & inference + # load model + infer_net = fluid.dygraph.jit.load(model_path) + # inference + x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32')) + pred = infer_net(x) + + # 3. load model & fine-tune + # load model + train_net = fluid.dygraph.jit.load(model_path) + train_net.train() + adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=train_net.parameters()) + # create data loader + train_loader = fluid.io.DataLoader.from_generator(capacity=5) + train_loader.set_batch_generator(random_batch_reader()) + # fine-tune + for data in train_loader(): + img, label = data + label.stop_gradient = True + + cost = train_net(img) + + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + + avg_loss.backward() + adam.minimize(avg_loss) + train_net.clear_gradients() + + 2. Load model saved by :ref:`api_fluid_io_save_inference_model` then performing and fine-tune training. + + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + + BATCH_SIZE = 32 + BATCH_NUM = 20 + + def random_batch_reader(): + def _get_random_images_and_labels(image_shape, label_shape): + image = np.random.random(size=image_shape).astype('float32') + label = np.random.random(size=label_shape).astype('int64') + return image, label + + def __reader__(): + for _ in range(BATCH_NUM): + batch_image, batch_label = _get_random_images_and_labels( + [BATCH_SIZE, 784], [BATCH_SIZE, 1]) + yield batch_image, batch_label + + return __reader__ + + img = fluid.data(name='img', shape=[None, 784], dtype='float32') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') + pred = fluid.layers.fc(input=img, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=pred, label=label) + avg_loss = fluid.layers.mean(loss) + + optimizer = fluid.optimizer.SGD(learning_rate=0.001) + optimizer.minimize(avg_loss) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + loader = fluid.io.DataLoader.from_generator( + feed_list=[img, label], capacity=5, iterable=True) + loader.set_batch_generator(random_batch_reader(), places=place) + + # 1. train and save inference model + for data in loader(): + exe.run( + fluid.default_main_program(), + feed=data, + fetch_list=[avg_loss]) + + model_path = "fc.example.model" + fluid.io.save_inference_model( + model_path, ["img"], [pred], exe) + + # enable dygraph mode + fluid.enable_dygraph() + + # 2. load model & inference + fc = fluid.dygraph.jit.load(model_path) + x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32')) + pred = fc(x) + + # 3. load model & fine-tune + fc = fluid.dygraph.jit.load(model_path) + fc.train() + sgd = fluid.optimizer.SGD(learning_rate=0.001, + parameter_list=fc.parameters()) + + train_loader = fluid.io.DataLoader.from_generator(capacity=5) + train_loader.set_batch_generator( + random_batch_reader(), places=place) + + for data in train_loader(): + img, label = data + label.stop_gradient = True + + cost = fc(img) + + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + + avg_loss.backward() + sgd.minimize(avg_loss) + """ + return TranslatedLayer._construct(model_path, configs) + + @dygraph_only def _trace(layer, inputs, diff --git a/python/paddle/fluid/dygraph/static_runner.py b/python/paddle/fluid/dygraph/static_runner.py index 98960d3707fc0b00d47f3dcfda0231ac56c68706..d482077cd4f2aa5bf1cc30e4c71eac6e9bb7752f 100644 --- a/python/paddle/fluid/dygraph/static_runner.py +++ b/python/paddle/fluid/dygraph/static_runner.py @@ -14,550 +14,26 @@ from __future__ import print_function -import logging -import numpy as np -import os -import six +from paddle.fluid.dygraph.jit import SaveLoadConfig +from paddle.fluid.dygraph.io import TranslatedLayer -from . import layers -from .. import core -from .. import framework -from .. import backward -from ..layers import nn -from .base import switch_to_static_graph -from ... import compat as cpt - -# DESIGN IDEA: Add an special operator, execute static program inside operator. -# -# Op's Inputs: -# - the input variable of the user feed -# - the necessary parameters of the network -# Op's Outputs: -# - the output variable of fetch -# -# This op receives a complete program desc, internally creates scope -# and executor, executes this program. Key points: -# -# 1. Data Sharing: -# The varBase of the dynamic graph is not in the scope, so before the op -# executes the program internally, create persistent variables with the -# same name as feed, parameters, and fetch in the scope, and share the -# LoDTensor of the op input. -# -# 2. Forward and Backward Separation: -# Because the dynamic graph op performs the forward and backward separately, -# the forward program is used as the execution object of the forward op, -# and the reverse program is used as the execution object of the grad op. - - -class StaticModelRunner(layers.Layer): +# NOTE: This class will be deprecated later. +# It is kept here because PaddleHub is already using this API. +class StaticModelRunner(object): """ A Dynamic graph Layer for loading inference program and related parameters, and then performing fine-tune training or inference. - The loaded program and parameters are saved by `fluid.io.save_inference_model`. - .. note:: - **1. Dynamic graph mode do not support LoDTensor. - All original static graph model's feed targets or parametars - that depend on LoD are temporarily unavailable.** - **2. All saved inference model's feed targets need be given.** - **3. The ``stop_gradient`` information is lost and can not be recovered.** - **4. The parameter's ``trainable`` information is lost and can not be recovered.** - **5. Double gradient model is not supported now.** - **6. Now only supports loading models saved by `fluid.io.save_inference_model`.** - - Args: - model_dir(str): The directory path where the model is saved. - model_filename(str, optional): The file name of saved inference program. - If set to None, a default filename is - :code:`__model__`. - The default value is None. - params_filename(str, optional): The file name of saved all related parameters. - If set to None, parameters are saved - in separate files. - The default value is None. - - Returns: - Layer: A Layer object can run loaded program. - - Examples: - .. code-block:: python - - import numpy as np - import paddle.fluid as fluid - - BATCH_SIZE = 32 - BATCH_NUM = 20 - SAVE_DIRNAME = "fc.inference.model" - - def random_batch_reader(): - def _get_random_images_and_labels(image_shape, label_shape): - image = np.random.random(size=image_shape).astype('float32') - label = np.random.random(size=label_shape).astype('int64') - return image, label - - def __reader__(): - for _ in range(BATCH_NUM): - batch_image, batch_label = _get_random_images_and_labels( - [BATCH_SIZE, 784], [BATCH_SIZE, 1]) - yield batch_image, batch_label - - return __reader__ - - def train_and_save_static_model(place): - img = fluid.data(name='img', shape=[None, 784], dtype='float32') - label = fluid.data(name='label', shape=[None, 1], dtype='int64') - - pred = fluid.layers.fc(input=img, size=10, act='softmax') - - loss = fluid.layers.cross_entropy(input=pred, label=label) - avg_loss = fluid.layers.mean(loss) - - optimizer = fluid.optimizer.SGD(learning_rate=0.001) - optimizer.minimize(avg_loss) - - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - loader = fluid.io.DataLoader.from_generator( - feed_list=[img, label], capacity=5, iterable=True) - loader.set_batch_generator(random_batch_reader(), places=place) - - for data in loader(): - exe.run( - fluid.default_main_program(), - feed=data, - fetch_list=[avg_loss]) - - # save model by fluid.io.save_inference_model - fluid.io.save_inference_model( - SAVE_DIRNAME, ["img"], [pred], exe) - - - # Step 1. train and save inference model in static graph mode - place = fluid.CPUPlace() - train_and_save_static_model(place) - - # Step 2. load inference model in dygraph and fine-tune - with fluid.dygraph.guard(place): - fc = fluid.dygraph.static_runner.StaticModelRunner(SAVE_DIRNAME) - - sgd = fluid.optimizer.SGD(learning_rate=0.001, - parameter_list=fc.parameters()) - - train_loader = fluid.io.DataLoader.from_generator(capacity=5) - train_loader.set_batch_generator( - random_batch_reader(), places=place) - - for data in train_loader(): - img = data[0] - label = data[1] - label.stop_gradient = True - - cost = fc(inputs=img) - - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) - - avg_loss.backward() - sgd.minimize(avg_loss) + This is a temporary API, which will be deprecated later, please use + `fluid.dygraph.jit.load` to achieve the same function. """ - def __init__(self, model_dir, model_filename=None, params_filename=None): - super(StaticModelRunner, self).__init__() - - # Step 0. key variable definitions - # loaded inference program desc - self._infer_program_desc = None - # recovered train program desc - self._train_program_desc = None - # StaticModelRunner executed program desc, - # switch infer or train by train() and eval() - self._trace_program_desc = None - self._inner_scope = core.Scope() - # the layer outputs var desc - self._output_descs = [] - # input, output, params name list - self._input_names = [] - self._output_names = [] - self._param_names = [] - # train or eval flag - self._is_test = False - - # Step 1. load program desc from disk - # the saved model hold feed, fetch & scale op, no need, can be remove - self._infer_program_desc = self._load_static_model(model_dir, - model_filename) - - # Step 2. load all parameters - self._load_persisitable_dict(model_dir, params_filename) - - # Step 3. generate backwar program desc - self._train_program_desc = self._append_backward_desc() - - # Step 4. recheck parameters stop gradients - self._recheck_stop_gradients() - - # Step 5. set default mode to train - self.train() - - def train(self): - self._is_test = False - self._trace_program_desc = self._train_program_desc - - def eval(self): - self._is_test = True - self._trace_program_desc = self._infer_program_desc - - def forward(self, *args): - """ - Executed forward part of StaticModelRunner Layer. - Generally execute directly using the Layer object. - - Args: - args(tuple(np.ndarray|Variable)): the inputs of StaticModelRunner. - The order of input variables needs to be the same as the order - of feed variables when using `save_inference_model` to save model. - - Returns: - Variable|list[Variable]: The forward outputs of StaticModelRunner Layer. - If there is only one output, return Variable; - if there are multiple outputs, return list[Variable]. - """ - # Step 1. prepare inputs, outputs, attrs - input_vars = [] - for i, value in enumerate(args): - if not isinstance(value, (np.ndarray, core.VarBase)): - raise TypeError( - "The type of inputs.value in StaticModelRunner.forward must be numpy array or Variable(VarBase), but received %s." - % type(value)) - # NOTE: In order to unify the API, firstly convert the input to VarBase - if isinstance(value, np.ndarray): - var = core.VarBase( - value=value, - name=self._input_names[i], - persistable=False, - place=framework._current_expected_place(), - zero_copy=True) - else: - var = value - # TODO: here may have important name set by user - var.name = self._input_names[i] - input_vars.append(var) - - params = [] - for param in self._parameters.values(): - params.append(param) - - output_vars = [] - for var_desc in self._output_descs: - var = core.VarBase(var_desc.dtype(), - var_desc.shape(), - var_desc.name(), var_desc.type(), False) - output_vars.append(var) - - # hold forward variables - tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [], - "program_out_scope", - core.VarDesc.VarType.STEP_SCOPES, True) - tmp_scope_vec.value().set_scope(self._inner_scope) - - # Step 2. run prorgam by op - framework._dygraph_tracer().trace_op( - type='run_program', - inputs={'X': input_vars, - 'Params': params}, - outputs={'Out': output_vars, - 'OutScope': tmp_scope_vec}, - attrs={ - 'global_block': self._trace_program_desc.block(0), - 'start_op_index': 0, - 'end_op_index': self._infer_program_desc.block(0).op_size(), - 'is_test': self._is_test - }) - - # NOTE: [ why need set param's gradient type here ] - # if user set sparse gradient mode, the param's gradient - # will be SelectedRows, not LoDTensor. But tracer will just - # set param grad VarBase by forward VarBase(LoDTensor) - # If we don't change grad_var type here, RunProgramOp need - # transform SelectedRows to LoDTensor forcely, it may not - # be user wanted result. - for param in params: - grad_name = param.name + core.grad_var_suffix() - grad_var = self._trace_program_desc.block(0).find_var( - cpt.to_bytes(grad_name)) - # NOTE: cannot find var desc maybe no problem, such as in batch_norm - if grad_var is None: - continue - param._set_grad_type(grad_var.type()) - - # Step 3. prepare output, keep same form with inputs - outs = output_vars - if len(output_vars) == 1: - outs = output_vars[0] - return outs - - def _load_static_model(self, model_dir, model_filename=None): - # Step 1. dir and filename check - load_dirname = os.path.normpath(model_dir) - if not os.path.isdir(load_dirname): - raise ValueError("There is no directory named '%s'" % load_dirname) - + def __new__(cls, model_dir, model_filename=None, params_filename=None): + configs = SaveLoadConfig() if model_filename is not None: - model_filename = os.path.basename(model_filename) - else: - model_filename = "__model__" - model_filename = os.path.join(load_dirname, model_filename) - - # Step 2. parse program desc - with open(model_filename, "rb") as f: - program_desc_str = f.read() - - program_desc = core.ProgramDesc(program_desc_str) - if not core._is_program_version_supported(program_desc._version()): - raise ValueError("Unsupported program version: %d\n" % - program_desc._version()) - - # Step 3. - # - remove feed, fetch and useless scale-1 op - # - remove op_callstack attr - ops_to_remove = [] - root_block = program_desc.block(0) - for i in six.moves.range(root_block.op_size()): - op = root_block.op(i) - if op.type() == 'feed': - ops_to_remove.append(i) - feed_var_name = cpt.to_bytes(op.input('X')[0]) - root_block._remove_var(feed_var_name) - self._input_names.append(cpt.to_bytes(op.output('Out')[0])) - elif op.type() == 'scale' and op.output('Out')[0].startswith( - 'save_infer_model/scale_'): - ops_to_remove.append(i) - out_var_name = cpt.to_bytes(op.output('Out')[0]) - root_block._remove_var(out_var_name) - self._output_names.append(cpt.to_bytes(op.input('X')[0])) - self._output_descs.append( - root_block.find_var(cpt.to_bytes(op.input('X')[0]))) - elif op.type() == 'fetch': - ops_to_remove.append(i) - fetch_var_name = cpt.to_bytes(op.output('Out')[0]) - root_block._remove_var(fetch_var_name) - # NOTE: some old pre-train models have no extra scale_op - if not op.input('X')[0].startswith('save_infer_model/scale_'): - self._output_names.append(cpt.to_bytes(op.input('X')[0])) - self._output_descs.append( - root_block.find_var(cpt.to_bytes(op.input('X')[0]))) - else: - if op.has_attr("op_callstack"): - op.remove_attr("op_callstack") - - for op_idx in reversed(ops_to_remove): - root_block._remove_op(op_idx, op_idx + 1) - - # NOTE: reverse feed vars - self._input_names.reverse() - - # Step 4. add scale for outputs - tmp_program = self._build_program_by_desc(program_desc) - self._append_scale_to_output(tmp_program) - - return program_desc - - @switch_to_static_graph - def _append_scale_to_output(self, program): - # 1. append scale & save var - scale_output_vars = [] - with framework.program_guard(program): - for i, out in enumerate(self._output_descs): - var = program.global_block().var(out.name()) - var = nn.scale( - var, 1., name="static_model_runner/scale_{}".format(i)) - scale_output_vars.append(var) - # 2. update output names & descs - for i, var in enumerate(scale_output_vars): - self._output_names[i] = var.name - self._output_descs[i] = var.desc - - @switch_to_static_graph - def _append_backward_desc(self): - assert self._infer_program_desc is not None, "The StaticModelRunner not initialized properly." - program_desc_copy = core.ProgramDesc(self._infer_program_desc) - - # Step 1. set all `is_test` attributes to False - self._change_is_test_status(program_desc_copy, False) - - # Step 2. prepare program and related var - # NOTE: To reuse backward interfaces, build Program firstly. - # Originally, there is no need to build a program, but need to almost - # rewrite a series of methods for append_backward for program_desc. - # Therefore, in order to reuse the method of backward.py, build the program here. - fwd_op_num = program_desc_copy.block(0).op_size() - program = self._build_program_by_desc(program_desc_copy) - - # TODO: could the targets be in sub block? - targets = [] - for out in self._output_descs: - targets.append(program.global_block().var(out.name())) - - # Step 3. append backward - backward.gradients(targets=targets, inputs=[]) - return program.desc - - def _load_persisitable_dict(self, model_dir, params_filename=None): - load_dirname = os.path.normpath(model_dir) - assert self._infer_program_desc is not None, "The StaticModelRunner not initialized properly." - - persis_vars = self._get_persis_vars(self._infer_program_desc) - load_var_map = {} - for each_var in persis_vars: - orig_each_name = each_var.name() - # append suffix - self._append_loaded_suffix_to_param(each_var) - # create output varbase - new_var = framework.ParamBase( - shape=each_var.shape(), - dtype=each_var.dtype(), - name=each_var.name(), - type=each_var.type(), - persistable=True) - if params_filename is None: - if not self._is_parameter(each_var): - continue - framework._dygraph_tracer().trace_op( - type='load', - inputs={}, - outputs={'Out': new_var}, - attrs={ - 'file_path': os.path.join(load_dirname, orig_each_name) - }) - new_var.stop_gradient = False - self.add_parameter(name=new_var.name, parameter=new_var) - self._param_names.append(new_var.name) - else: - load_var_map[each_var.name()] = new_var - + configs.model_filename = model_filename if params_filename is not None: - load_var_list = [] - for name in sorted(load_var_map.keys()): - load_var_list.append(load_var_map[name]) - - framework._dygraph_tracer().trace_op( - type='load_combine', - inputs={}, - outputs={'Out': load_var_list}, - attrs={ - 'file_path': os.path.join(load_dirname, params_filename) - }) - - for each_var in persis_vars: - if not self._is_parameter(each_var): - continue - param = load_var_map[each_var.name()] - param.stop_gradient = False - self.add_parameter(name=param.name, parameter=param) - self._param_names.append(param.name) - - def _recheck_stop_gradients(self): - assert self._train_program_desc is not None, "The StaticModelRunner not initialized properly." - # NOTE: After loading the model, the stop_gradient information - # of the original variable is lost, but if a parameter does not - # have a corresponding @GRAD variable in the backward program, - # it can be said that it is also stop_gradient - all_var_names = self._get_all_var_names(self._train_program_desc) - for param_name in self._parameters: - param_grad_name = param_name + core.grad_var_suffix() - if param_grad_name not in all_var_names: - self._parameters[param_name].stop_gradient = True - - def _get_all_var_names(self, program_desc): - all_var_names = set() - for i in six.moves.range(program_desc.num_blocks()): - block = program_desc.block(i) - for var in block.all_vars(): - all_var_names.add(var.name()) - return all_var_names - - def _get_persis_vars(self, program_desc): - persis_vars = [] - for i in six.moves.range(program_desc.num_blocks()): - block = program_desc.block(i) - persis_vars.extend( - list(filter(self._is_persistable, block.all_vars()))) - return persis_vars - - @switch_to_static_graph - def _build_program_by_desc(self, program_desc): - prog = framework.Program() - prog.desc = program_desc - prog.blocks = [ - framework.Block(prog, i) - for i in six.moves.range(prog.desc.num_blocks()) - ] - prog._sync_with_cpp() - return prog - - def _is_persistable(self, var_desc): - if var_desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \ - var_desc.type() == core.VarDesc.VarType.FETCH_LIST or \ - var_desc.type() == core.VarDesc.VarType.READER or \ - var_desc.type() == core.VarDesc.VarType.RAW: - return False - return var_desc.persistable() - - def _is_parameter(self, persis_var_desc): - assert self._infer_program_desc is not None, "The StaticModelRunner not initialized properly." - # 1. firstly, param should be input of op - input_ops = [] # op can be repeated - for block_idx in six.moves.range(self._infer_program_desc.num_blocks()): - block = self._infer_program_desc.block(block_idx) - for op_idx in six.moves.range(block.op_size()): - op = block.op(op_idx) - # NOTE: parameter is the input of a certain op - if persis_var_desc.name() in op.input_arg_names(): - input_ops.append(op) - # 2. secondly, param should not be output of op or be same op's output - for block_idx in six.moves.range(self._infer_program_desc.num_blocks()): - block = self._infer_program_desc.block(block_idx) - for op_idx in six.moves.range(block.op_size()): - op = block.op(op_idx) - if persis_var_desc.name() in op.output_arg_names(): - # such as batch_norm_op - if op in input_ops: - continue - else: - return False - return True - - def _change_is_test_status(self, program_desc, is_test): - # change all `is_test` attributes - for i in six.moves.range(program_desc.num_blocks()): - block = program_desc.block(i) - for j in six.moves.range(block.op_size()): - op = block.op(j) - if op.has_attr('is_test'): - op._set_attr('is_test', is_test) - - def _append_loaded_suffix(self, name): - """ - Append grad suffix to the given variable name - e.g. x ==> x@LOADED - """ - suffix = core.loaded_var_suffix() - name = cpt.to_text(name) - if suffix not in name: - name = name + suffix - return name - - def _append_loaded_suffix_to_param(self, param_desc): - old_name = param_desc.name() - new_name = self._append_loaded_suffix(param_desc.name()) - param_desc.set_name(new_name) - for block_idx in six.moves.range(self._infer_program_desc.num_blocks()): - block = self._infer_program_desc.block(block_idx) - for op_idx in six.moves.range(block.op_size()): - op = block.op(op_idx) - op._rename_input(old_name, new_name) - op._rename_output(old_name, new_name) + configs.params_filename = params_filename + return TranslatedLayer._construct(model_dir, configs) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py index 71aed18c105b858b9defb70e0daf46af5f6ce804..0541c37fc71b08cde1f71fee72365d4535596a6f 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py @@ -26,6 +26,7 @@ from paddle.fluid.dygraph import to_variable from paddle.fluid.dygraph.nn import Conv2D, Linear, Pool2D from paddle.fluid.optimizer import AdamOptimizer from paddle.fluid.dygraph.jit import declarative +from paddle.fluid.dygraph.io import VARIABLE_FILENAME from paddle.fluid.dygraph.dygraph_to_static import ProgramTranslator SEED = 2020 @@ -201,6 +202,9 @@ class TestMNISTWithDeclarative(TestMNIST): self.check_save_inference_model([dy_x_data, y_data], prog_trans, to_static, prediction) + # new save load check + self.check_jit_save_load(mnist, [dy_x_data], [img], + to_static, prediction) break return loss_data @@ -224,6 +228,45 @@ class TestMNISTWithDeclarative(TestMNIST): return np.array(results[0]) + def check_jit_save_load(self, model, inputs, input_spec, to_static, gt_out): + if to_static: + infer_model_path = "./test_mnist_inference_model_by_jit_save" + configs = fluid.dygraph.jit.SaveLoadConfig() + configs.output_spec = [gt_out] + fluid.dygraph.jit.save( + layer=model, + model_path=infer_model_path, + input_spec=input_spec, + configs=configs) + # load in static mode + static_infer_out = self.jit_load_and_run_inference_static( + infer_model_path, inputs) + self.assertTrue(np.allclose(gt_out.numpy(), static_infer_out)) + # load in dygraph mode + dygraph_infer_out = self.jit_load_and_run_inference_dygraph( + infer_model_path, inputs) + self.assertTrue(np.allclose(gt_out.numpy(), dygraph_infer_out)) + + @switch_to_static_graph + def jit_load_and_run_inference_static(self, model_path, inputs): + exe = fluid.Executor(self.place) + [inference_program, feed_target_names, + fetch_targets] = fluid.io.load_inference_model( + dirname=model_path, + executor=exe, + params_filename=VARIABLE_FILENAME) + assert len(inputs) == len(feed_target_names) + results = exe.run(inference_program, + feed=dict(zip(feed_target_names, inputs)), + fetch_list=fetch_targets) + + return np.array(results[0]) + + def jit_load_and_run_inference_dygraph(self, model_path, inputs): + infer_net = fluid.dygraph.jit.load(model_path) + pred = infer_net(inputs[0]) + return pred.numpy() + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py new file mode 100644 index 0000000000000000000000000000000000000000..640e966354b44b733f67f71e11f79472c184a9ea --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py @@ -0,0 +1,234 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np + +import paddle +import paddle.fluid as fluid +from paddle.fluid.dygraph import Linear +from paddle.fluid.dygraph import declarative + +BATCH_SIZE = 32 +BATCH_NUM = 20 +SEED = 10 + + +def random_batch_reader(): + def _get_random_images_and_labels(image_shape, label_shape): + np.random.seed(SEED) + image = np.random.random(size=image_shape).astype('float32') + label = np.random.random(size=label_shape).astype('int64') + return image, label + + def __reader__(): + for _ in range(BATCH_NUM): + batch_image, batch_label = _get_random_images_and_labels( + [BATCH_SIZE, 784], [BATCH_SIZE, 1]) + yield batch_image, batch_label + + return __reader__ + + +class LinearNet(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(LinearNet, self).__init__() + self._linear = Linear(in_size, out_size) + + @declarative + def forward(self, x): + return self._linear(x) + + +class LinearNetNotDeclarative(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(LinearNetNotDeclarative, self).__init__() + self._linear = Linear(in_size, out_size) + + def forward(self, x): + return self._linear(x) + + +class LinearNetReturnLoss(fluid.dygraph.Layer): + def __init__(self, in_size, out_size): + super(LinearNetReturnLoss, self).__init__() + self._linear = Linear(in_size, out_size) + + @declarative + def forward(self, x): + y = self._linear(x) + z = self._linear(y) + loss = fluid.layers.mean(z) + return z, loss + + +def train(layer): + # create optimizer + adam = fluid.optimizer.AdamOptimizer( + learning_rate=0.1, parameter_list=layer.parameters()) + # create data loader + train_loader = fluid.io.DataLoader.from_generator(capacity=5) + train_loader.set_batch_generator(random_batch_reader()) + # train + for data in train_loader(): + img, label = data + label.stop_gradient = True + + cost = layer(img) + + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + + avg_loss.backward() + adam.minimize(avg_loss) + layer.clear_gradients() + return [img], layer, avg_loss + + +def infer(layer): + x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32')) + return layer(x) + + +class TestJitSaveLoad(unittest.TestCase): + def setUp(self): + self.model_path = "model.test_jit_save_load" + # enable dygraph mode + fluid.enable_dygraph() + # config seed + fluid.default_main_program().random_seed = SEED + + def train_and_save_model(self): + layer = LinearNet(784, 1) + example_inputs, layer, _ = train(layer) + fluid.dygraph.jit.save( + layer=layer, model_path=self.model_path, input_spec=example_inputs) + return layer + + def test_save(self): + # train and save model + self.train_and_save_model() + + def test_load_infernece(self): + # train and save model + train_layer = self.train_and_save_model() + # load model + infer_layer = fluid.dygraph.jit.load(self.model_path) + train_layer.eval() + # inference & compare + x = fluid.dygraph.to_variable( + np.random.random((1, 784)).astype('float32')) + self.assertTrue( + np.array_equal(train_layer(x).numpy(), infer_layer(x).numpy())) + + def test_load_finetune(self): + # train and save model + train_layer = self.train_and_save_model() + # load model + load_train_layer = fluid.dygraph.jit.load(self.model_path) + load_train_layer.train() + # train & compare + _, _, train_loss = train(train_layer) + _, _, load_train_loss = train(load_train_layer) + self.assertTrue( + np.array_equal(train_loss.numpy(), load_train_loss.numpy())) + + def test_save_get_program_failed(self): + layer = LinearNetNotDeclarative(784, 1) + example_inputs, layer, _ = train(layer) + with self.assertRaises(RuntimeError): + fluid.dygraph.jit.save( + layer=layer, + model_path=self.model_path, + input_spec=example_inputs) + + +class TestJitSaveLoadConfig(unittest.TestCase): + def setUp(self): + # enable dygraph mode + fluid.enable_dygraph() + # config seed + fluid.default_main_program().random_seed = SEED + + def basic_save_load(self, layer, model_path, configs): + # 1. train & save + example_inputs, train_layer, _ = train(layer) + fluid.dygraph.jit.save( + layer=train_layer, + model_path=model_path, + input_spec=example_inputs, + configs=configs) + # 2. load + infer_layer = fluid.dygraph.jit.load(model_path, configs=configs) + train_layer.eval() + # 3. inference & compare + x = fluid.dygraph.to_variable( + np.random.random((1, 784)).astype('float32')) + self.assertTrue( + np.array_equal(train_layer(x).numpy(), infer_layer(x).numpy())) + + def test_model_filename(self): + layer = LinearNet(784, 1) + model_path = "model.save_load_config.output_spec" + configs = fluid.dygraph.jit.SaveLoadConfig() + configs.model_filename = "__simplenet__" + self.basic_save_load(layer, model_path, configs) + + def test_params_filename(self): + layer = LinearNet(784, 1) + model_path = "model.save_load_config.params_filename" + configs = fluid.dygraph.jit.SaveLoadConfig() + configs.params_filename = "__params__" + self.basic_save_load(layer, model_path, configs) + + def test_separate_params(self): + layer = LinearNet(784, 1) + model_path = "model.save_load_config.separate_params" + configs = fluid.dygraph.jit.SaveLoadConfig() + configs.separate_params = True + self.basic_save_load(layer, model_path, configs) + + def test_output_spec(self): + train_layer = LinearNetReturnLoss(8, 8) + adam = fluid.optimizer.AdamOptimizer( + learning_rate=0.1, parameter_list=train_layer.parameters()) + x = fluid.dygraph.to_variable( + np.random.random((4, 8)).astype('float32')) + for i in range(10): + out, loss = train_layer(x) + loss.backward() + adam.minimize(loss) + train_layer.clear_gradients() + + model_path = "model.save_load_config.output_spec" + configs = fluid.dygraph.jit.SaveLoadConfig() + configs.output_spec = [out] + fluid.dygraph.jit.save( + layer=train_layer, + model_path=model_path, + input_spec=[x], + configs=configs) + + train_layer.eval() + infer_layer = fluid.dygraph.jit.load(model_path, configs=configs) + x = fluid.dygraph.to_variable( + np.random.random((4, 8)).astype('float32')) + self.assertTrue( + np.array_equal(train_layer(x)[0].numpy(), infer_layer(x).numpy())) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/imperative/__init__.py b/python/paddle/imperative/__init__.py index 79e9c57befed77d4f2ad5210c3f81cf1943226f3..489888a2fef39b2cca5b918a412d231784471ddc 100644 --- a/python/paddle/imperative/__init__.py +++ b/python/paddle/imperative/__init__.py @@ -16,7 +16,7 @@ __all__ = [ 'BackwardStrategy', 'enabled', 'grad', 'guard', 'LayerList', 'load', 'save', 'prepare_context', 'to_variable', 'TracedLayer', 'no_grad', 'ParallelEnv', - 'ProgramTranslator', 'declarative', 'DataParallel' + 'ProgramTranslator', 'declarative', 'DataParallel', 'TranslatedLayer', 'jit' ] __all__ += [ @@ -31,6 +31,7 @@ from ..fluid.dygraph.checkpoint import save_dygraph as save from ..fluid.dygraph.parallel import prepare_context, ParallelEnv, DataParallel from ..fluid.dygraph.jit import TracedLayer, declarative from ..fluid.dygraph import ProgramTranslator +from . import jit from ..fluid.dygraph.learning_rate_scheduler import NoamDecay, PiecewiseDecay, NaturalExpDecay, ExponentialDecay, \ InverseTimeDecay, PolynomialDecay, CosineDecay diff --git a/python/paddle/imperative/jit/__init__.py b/python/paddle/imperative/jit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..85fccf6e689ebf606092df8c3f94f561a68705ed --- /dev/null +++ b/python/paddle/imperative/jit/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ...fluid.dygraph.jit import save, load, SaveLoadConfig +from ...fluid.dygraph.io import TranslatedLayer + +__all__ = ['save', 'load', 'SaveLoadConfig'] diff --git a/python/setup.py.in b/python/setup.py.in index 67db20ce14bc6b864352a87920ca2f6e121d2edf..31a089d727276e80e78f3d025a01c2ab75e4a0a1 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -202,6 +202,7 @@ packages=['paddle', 'paddle.nn.initializer', 'paddle.metric', 'paddle.imperative', + 'paddle.imperative.jit', 'paddle.tensor', ]