未验证 提交 41d22472 编写于 作者: C Chen Weihang 提交者: GitHub

[Dy2static] Refactor ProgramTranslator save_inference_model API (#24989)

* experimental refactoring, test=develop

* add TranslatedLayer & remove StaticModelRunner, test=develop

* revert tracedlayer change, test=develop

* fix test_mnist unittest error, test=develop

* add doc & examples, test=develop

* polish doc details, test=develop

* add imperative.jit module, test=develop

* change TranslatedLayer pos, test=develop

* adjust jit module import path, test=develop

* polish doc based review result

* add SaveLoadConfig.separate_params to save paraams separately

* add Layer.buffer support, test=develop

* polish doc details based review result, test=develop

* polish details baesd review comments, test=develop

* add empty str check for param, test=develop

* add unittests, test=develop

* polish details based review comment, test=develop

* remove blanks in comment, test=develop

* polish doc details, test=develop

* update imperative doc link, test=develop

* add api attr for load, test=develop
上级 43f9f180
......@@ -49,13 +49,13 @@ static void CheckInputVarStatus(const Variable &var,
var.IsType<LoDTensor>(), true,
platform::errors::InvalidArgument(
"The input variable %s of "
"RunProgram(Grad)Op(StaticModelRunner) holds "
"RunProgram(Grad)Op holds "
"wrong type. Expect type is LoDTensor, but receive type is %s.",
var_name, platform::demangle(framework::ToTypeName(var.Type()))));
PADDLE_ENFORCE_EQ(
var.Get<LoDTensor>().IsInitialized(), true,
platform::errors::InvalidArgument("The tensor in input variable %s of "
"RunProgram(Grad)Op(StaticModelRunner) "
"RunProgram(Grad)Op "
"is not initialized.",
var_name));
}
......@@ -68,14 +68,14 @@ static void CheckOutputVarStatus(const Variable &src_var,
src_var.IsType<LoDTensor>(), true,
platform::errors::InvalidArgument(
"The output variable %s get from "
"RunProgram(Grad)Op(StaticModelRunner)'s internal scope holds "
"RunProgram(Grad)Op's internal scope holds "
"wrong type. Expect type is LoDTensor, but receive type is %s.",
var_name,
platform::demangle(framework::ToTypeName(src_var.Type()))));
PADDLE_ENFORCE_EQ(src_var.Get<LoDTensor>().IsInitialized(), true,
platform::errors::InvalidArgument(
"The tensor in output variable %s get from "
"RunProgram(Grad)Op(StaticModelRunner)'s internal "
"RunProgram(Grad)Op's internal "
"scope is not initialized.",
var_name));
} else if (dst_var.IsType<SelectedRows>()) {
......@@ -83,20 +83,20 @@ static void CheckOutputVarStatus(const Variable &src_var,
src_var.IsType<SelectedRows>(), true,
platform::errors::InvalidArgument(
"The output variable %s get from "
"RunProgram(Grad)Op(StaticModelRunner)'s internal scope holds "
"RunProgram(Grad)Op's internal scope holds "
"wrong type. Expect type is SelectedRows, but receive type is %s.",
var_name,
platform::demangle(framework::ToTypeName(src_var.Type()))));
PADDLE_ENFORCE_EQ(src_var.Get<SelectedRows>().value().IsInitialized(), true,
platform::errors::InvalidArgument(
"The tensor in output variable %s get from "
"RunProgram(Grad)Op(StaticModelRunner)'s "
"RunProgram(Grad)Op's "
"internal scope is not initialized.",
var_name));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"The RunProgram(Grad)Op(StaticModelRunner) only support output "
"The RunProgram(Grad)Op only support output "
"variable of type LoDTensor or SelectedRows, "
"but received variable %s's type is %s",
var_name, platform::demangle(framework::ToTypeName(dst_var.Type()))));
......@@ -143,7 +143,7 @@ static void ShareVarsFromScope(const std::vector<Variable *> &vars,
auto *var = scope->FindVar(var_names[i]);
PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::NotFound("The output variable %s is not in "
"RunProgram(Grad)Op(StaticModelRunner)'"
"RunProgram(Grad)Op'"
"s internal scope.",
var_names[i]));
CheckOutputVarStatus(*var, *vars[i], var_names[i]);
......
......@@ -44,6 +44,9 @@ from .backward_strategy import *
from . import jit
from .jit import *
from . import io
from .io import *
from . import static_runner
from .static_runner import StaticModelRunner
......@@ -63,5 +66,6 @@ __all__ += checkpoint.__all__
__all__ += learning_rate_scheduler.__all__
__all__ += backward_strategy.__all__
__all__ += jit.__all__
__all__ += io.__all__
__all__ += rnn.__all__
__all__ += ['ProgramTranslator']
......@@ -36,6 +36,7 @@ from paddle.fluid.wrapped_decorator import signature_safe_contextmanager
from paddle.fluid.dygraph.base import param_guard
from paddle.fluid.data_feeder import check_type
from paddle.fluid.dygraph.dygraph_to_static.partial_program import partial_program_from
from paddle.fluid.annotations import deprecated
__all__ = ['ProgramTranslator', 'convert_to_static']
......@@ -125,6 +126,9 @@ class FunctionSpec(object):
self._args = args
self._kwargs = kwargs
dyfunc = getattr(func, '__wrapped__', func)
self._dyfunc_code = inspect.getsource(dyfunc)
def is_method(self):
return self._args and isinstance(self._args[0], layers.Layer)
......@@ -198,7 +202,9 @@ class FunctionSpec(object):
# Note: if dygraph function is a method of class,
# consider instance info as hash key.
if self.is_method():
return self._dyfunc, self._args[0]
# NOTE: we can use Layer's (instance + function code) as hash key.
# An instance will not hold two identical methods
return self._dyfunc_code, self._args[0]
else:
return self._dyfunc
......@@ -312,6 +318,17 @@ class ProgramCache(object):
self._caches[item] = self._build_once(item)
return self._caches[item]
def get_program(self, item):
if not isinstance(item, FunctionSpec):
raise ValueError(
"Input item's type should be FunctionSpec, but received %s" %
type(item))
if item not in self._caches:
raise RuntimeError(
"Failed to find program for input item, please decorate input function by `@declarative`."
)
return self._caches[item]
def last(self):
assert len(
self._caches) >= 1, "No valid cached program in ProgramCache."
......@@ -633,6 +650,7 @@ class ProgramTranslator(object):
source_code = ast_to_source_code(root_wrapper.node)
return source_code
@deprecated(since='2.0', instead="paddle.imperative.jit.save")
@switch_to_static_graph
def save_inference_model(self, dirname, feed=None, fetch=None):
"""
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import six
import pickle
import numpy as np
from paddle import compat as cpt
from paddle.fluid import core
from paddle.fluid import framework
from paddle.fluid import backward
from paddle.fluid.dygraph import layers
from paddle.fluid.layers import nn
from paddle.fluid.dygraph.base import switch_to_static_graph
__all__ = ['TranslatedLayer']
VARIABLE_FILENAME = "__variables__"
EXTRA_VAR_INFO_FILENAME = "__variables.info__"
def _load_program_desc(model_file_path):
# 1. parse program desc
with open(model_file_path, "rb") as f:
program_desc_str = f.read()
program_desc = core.ProgramDesc(program_desc_str)
if not core._is_program_version_supported(program_desc._version()):
raise ValueError("Unsupported program version: %d\n" %
program_desc._version())
return program_desc
def _is_persistable(var_desc):
if var_desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \
var_desc.type() == core.VarDesc.VarType.FETCH_LIST or \
var_desc.type() == core.VarDesc.VarType.READER or \
var_desc.type() == core.VarDesc.VarType.RAW:
return False
return var_desc.persistable()
def _is_parameter(persistable_var_desc, program_desc):
# 1. firstly, param should be input of op
input_ops = [] # op can be repeated
for block_idx in six.moves.range(program_desc.num_blocks()):
block = program_desc.block(block_idx)
for op_idx in six.moves.range(block.op_size()):
op = block.op(op_idx)
# NOTE: parameter is the input of a certain op
if persistable_var_desc.name() in op.input_arg_names():
input_ops.append(op)
# 2. secondly, param should not be output of op or be same op's output
for block_idx in six.moves.range(program_desc.num_blocks()):
block = program_desc.block(block_idx)
for op_idx in six.moves.range(block.op_size()):
op = block.op(op_idx)
if persistable_var_desc.name() in op.output_arg_names():
# such as batch_norm_op
if op in input_ops:
continue
else:
return False
return True
def _get_persistable_vars(program_desc):
persistable_vars = []
for i in six.moves.range(program_desc.num_blocks()):
block = program_desc.block(i)
persistable_vars.extend(list(filter(_is_persistable, block.all_vars())))
return persistable_vars
def _get_persistable_var_names(program_desc):
"""
Get all persistable variable names in ProgramDesc.
"""
var_names = []
persistable_vars = _get_persistable_vars(program_desc)
for var in persistable_vars:
var_names.append(var.name())
return var_names
def _get_all_var_names(program_desc):
all_var_names = set()
for i in six.moves.range(program_desc.num_blocks()):
block = program_desc.block(i)
for var in block.all_vars():
all_var_names.add(var.name())
return all_var_names
def _append_loaded_suffix(name):
"""
Append loaded suffix to the given variable name
e.g. x ==> x@LOADED
"""
suffix = core.loaded_var_suffix()
name = cpt.to_text(name)
if suffix not in name:
name = name + suffix
return name
def _remove_loaded_suffix(name):
"""
Remove loaded suffix to the given variable name
e.g. x@LOADED ==> x
"""
suffix = core.loaded_var_suffix()
name = cpt.to_text(name)
return name.replace(suffix, '')
def _append_loaded_suffix_to_var(program_desc):
persistable_vars = _get_persistable_vars(program_desc)
for var_desc in persistable_vars:
old_name = var_desc.name()
new_name = _append_loaded_suffix(var_desc.name())
var_desc.set_name(new_name)
for block_idx in six.moves.range(program_desc.num_blocks()):
block = program_desc.block(block_idx)
for op_idx in six.moves.range(block.op_size()):
op = block.op(op_idx)
op._rename_input(old_name, new_name)
op._rename_output(old_name, new_name)
@switch_to_static_graph
def _build_program_by_desc(program_desc):
prog = framework.Program()
prog.desc = program_desc
prog.blocks = [
framework.Block(prog, i)
for i in six.moves.range(prog.desc.num_blocks())
]
prog._sync_with_cpp()
return prog
def _change_is_test_status(program_desc, is_test):
# change all `is_test` attributes
for i in six.moves.range(program_desc.num_blocks()):
block = program_desc.block(i)
for j in six.moves.range(block.op_size()):
op = block.op(j)
if op.has_attr('is_test'):
op._set_attr('is_test', is_test)
class _ProgramHolder(object):
"""
Holds the execution information of a Program.
_ProgramHolder is the execution unit of TranslatedLayer,
if TranslatedLayer contains multiple _ProgramHolder,
it can execute multiple methods
_ProgramHolder is an internal concept.
"""
def __init__(self, program_desc):
super(_ProgramHolder, self).__init__()
# input, output, persistable var info
self._input_names = []
self._persistable_names = []
self._output_descs = []
# execution scope
self._inner_scope = core.Scope()
# forward program
self._infer_program_desc = self._preprocess(program_desc)
# forward + backward program
self._train_program_desc = self._append_backward_desc(
self._infer_program_desc)
@property
def infer_program(self):
return self._infer_program_desc
@property
def train_program(self):
return self._train_program_desc
@property
def input_names(self):
return self._input_names
@property
def output_decs(self):
return self._output_descs
@property
def persistable_names(self):
return self._persistable_names
@property
def scope(self):
return self._inner_scope
def _preprocess(self, program_desc):
# 1. Prune original program
# remove feed, fetch and scale-1 op, remove op_callstack attr
ops_to_remove = []
root_block = program_desc.block(0)
for i in six.moves.range(root_block.op_size()):
op = root_block.op(i)
if op.type() == 'feed':
ops_to_remove.append(i)
feed_var_name = cpt.to_bytes(op.input('X')[0])
root_block._remove_var(feed_var_name)
self._input_names.append(cpt.to_bytes(op.output('Out')[0]))
elif op.type() == 'scale' and op.output('Out')[0].startswith(
'save_infer_model/scale_'):
ops_to_remove.append(i)
out_var_name = cpt.to_bytes(op.output('Out')[0])
root_block._remove_var(out_var_name)
self._output_descs.append(
root_block.find_var(cpt.to_bytes(op.input('X')[0])))
elif op.type() == 'fetch':
ops_to_remove.append(i)
fetch_var_name = cpt.to_bytes(op.output('Out')[0])
root_block._remove_var(fetch_var_name)
# NOTE: some old pre-train models have no extra scale_op
if not op.input('X')[0].startswith('save_infer_model/scale_'):
self._output_descs.append(
root_block.find_var(cpt.to_bytes(op.input('X')[0])))
else:
if op.has_attr("op_callstack"):
op.remove_attr("op_callstack")
for op_idx in reversed(ops_to_remove):
root_block._remove_op(op_idx, op_idx + 1)
# 2. Input processing, reverse feed vars
self._input_names.reverse()
# 3. Output processing, add scale for outputs
tmp_program = _build_program_by_desc(program_desc)
# NOTE: [why need append scale for outputs]
# When dealing with some more complex pre-training models, there
# will be situations where the pre-training model has multiple
# fetch outputs. In the scenario of multiple fetch outputs,
# there is a special case where multiple outputs of the model
# may be on the same branch. According to the user's subsequent
# use, multiple outputs may be associated with multiple branches.
# These subsequent operations are added in TranslatedLayer is
# agnostic during initialization, which results in subsequent
# gradient accumulation operations that are required on the
# output node in the middle of the branch will not be performed,
# resulting in error, details see pull request:
# [https://github.com/PaddlePaddle/Paddle/pull/24627]
self._append_scale_to_output(tmp_program)
# 4. Persistable vars processing
# - append @LOADED suffix to persistable vars
# NOTE: [why need to append suffix to persistable vars]
# Dygraph and static graph mode use the same naming mechanism.
# If users want to load the model fine-tune, it is possible
# to add the existing Layer in the loaded model to enhance
# the network. For example, the original saved model has linear,
# and later after loading, a new linear is added. At this time,
# there will be a problem of duplicate names, so here is unified
# to add the LOADED suffix to the parameters of the model loaded
# during training. And in order to avoid multiple @LOADED suffix
# are appended to variable name, we only append @LOADED suffix to
# the variable that not contains @LOADED suffix.
_append_loaded_suffix_to_var(program_desc)
# - get persistable var
self._persistable_names = _get_persistable_var_names(program_desc)
return program_desc
@switch_to_static_graph
def _append_scale_to_output(self, program):
# 1. append scale & save var
scale_output_vars = []
with framework.program_guard(program):
for i, out in enumerate(self._output_descs):
var = program.global_block().var(out.name())
var = nn.scale(
var, 1., name="static_model_runner/scale_{}".format(i))
scale_output_vars.append(var)
# 2. update output names & descs
for i, var in enumerate(scale_output_vars):
self._output_descs[i] = var.desc
@switch_to_static_graph
def _append_backward_desc(self, infer_program_desc):
program_desc_copy = core.ProgramDesc(infer_program_desc)
# 1. set all `is_test` attributes to False
_change_is_test_status(program_desc_copy, False)
# 2. prepare program and related var
# NOTE: To reuse backward interfaces, build Program firstly.
# Originally, there is no need to build a program, but need to almost
# rewrite a series of methods for append_backward for program_desc.
# Therefore, in order to reuse the method of backward.py, build the program here.
program = _build_program_by_desc(program_desc_copy)
targets = []
for out in self._output_descs:
targets.append(program.global_block().var(out.name()))
# 3. append backward
backward.gradients(targets=targets, inputs=[])
return program.desc
# [ TranslatedLayer : Run program in imperative mode ]
#
# DESIGN IDEA: using an special operator `RunProgram`, execute program inside operator.
#
# Op's Inputs:
# - the input variable of the user feed
# - the necessary parameters of the network
# Op's Outputs:
# - the output variable of fetch
#
# This op receives a complete program desc, internally creates scope
# and executor, executes this program. Key points:
#
# 1. Data Sharing:
# The varBase of the dynamic graph is not in the scope, so before the op
# executes the program internally, create persistent variables with the
# same name as feed, parameters, and fetch in the scope, and share the
# LoDTensor of the op input.
#
# 2. Forward and Backward Separation:
# Because the dynamic graph op performs the forward and backward separately,
# in the forward op RunProgram, we only execute the forward part of whole program,
# and in the backward op RunProgramGrad, we execute the backward part of program.
# We can not separate the program into forward and backward part, which will
# make some control flow execution logic wrong.
# NOTE: [compatible] deal with model saved by save_inference_model,
# which need get var info from program desc
def _load_persistable_vars_by_program(model_path,
program_holder,
params_filename=None):
# make sure the path has been checked
persistable_vars = _get_persistable_vars(program_holder.infer_program)
load_var_dict = {}
for each_var in persistable_vars:
orig_each_name = _remove_loaded_suffix(each_var.name())
if _is_parameter(each_var, program_holder.infer_program):
# create output varbase
new_var = framework.ParamBase(
shape=each_var.shape(),
dtype=each_var.dtype(),
name=each_var.name(),
type=each_var.type(),
persistable=True)
else:
new_var = framework._varbase_creator(
type=each_var.type(),
name=each_var.name(),
shpae=each_var.shape(),
dtype=each_var.dtype(),
persistable=True)
if params_filename is None:
framework._dygraph_tracer().trace_op(
type='load',
inputs={},
outputs={'Out': new_var},
attrs={'file_path': os.path.join(model_path, orig_each_name)})
new_var.stop_gradient = False
load_var_dict[each_var.name()] = new_var
if params_filename is not None:
load_var_list = []
for name in sorted(load_var_dict.keys()):
load_var_list.append(load_var_dict[name])
framework._dygraph_tracer().trace_op(
type='load_combine',
inputs={},
outputs={'Out': load_var_list},
attrs={'file_path': os.path.join(model_path, params_filename)})
for each_var in persistable_vars:
if not _is_parameter(each_var, program_holder.infer_program):
continue
param = load_var_dict[each_var.name()]
param.stop_gradient = False
# NOTE: [Recovery stop gradient information based on the program]
# After loading the model, the stop_gradient information
# of the original variable is lost, but if a parameter does not
# have a corresponding @GRAD variable in the backward program,
# it can be said that it is also stop_gradient
all_var_names = _get_all_var_names(program_holder.train_program)
for var_name in load_var_dict:
grad_var_name = var_name + core.grad_var_suffix()
if grad_var_name not in all_var_names:
load_var_dict[var_name].stop_gradient = True
return load_var_dict
def _load_persistable_vars(model_path,
var_info_path,
separate_params=False,
params_filename=None):
# 1. load extra var info
with open(var_info_path, 'rb') as f:
extra_var_info = pickle.load(f) if six.PY2 else pickle.load(
f, encoding='latin1')
# 2. construct var dict
load_var_dict = dict()
load_var_list = []
# NOTE: some var may not be Parameter
for name in sorted(extra_var_info):
# append suffix, see [why need to append suffix to persistable vars]
new_name = _append_loaded_suffix(name)
# create output varbase
if extra_var_info[name].get('trainable', None) is not None:
# use default shape and dtype
new_var = framework.ParamBase(
shape=[1], # only to pass check, this shape is not meaningful
dtype=core.VarDesc.VarType.FP32,
name=new_name,
persistable=True)
else:
new_var = framework._varbase_creator(
name=new_name, persistable=True)
# load separate vars
if separate_params is True:
framework._dygraph_tracer().trace_op(
type='load',
inputs={},
outputs={'Out': new_var},
attrs={'file_path': os.path.join(model_path, name)})
new_var.stop_gradient = extra_var_info[name]['stop_gradient']
load_var_dict[new_name] = new_var
load_var_list.append(new_var)
# 3. load all vars
if separate_params is False:
if params_filename is not None:
var_file_path = os.path.join(model_path, params_filename)
else:
var_file_path = os.path.join(model_path, VARIABLE_FILENAME)
framework._dygraph_tracer().trace_op(
type='load_combine',
inputs={},
outputs={'Out': load_var_list},
attrs={'file_path': var_file_path})
return load_var_dict
def _construct_program_holders(model_path, model_filename=None):
# make sure the path has been checked
program_holder_dict = dict()
if model_filename is not None:
# [compatible] if assign model_filename, only can load one program as Layer.forward
model_filename = os.path.basename(model_filename)
model_file_path = os.path.join(model_path, model_filename)
program_holder_dict['forward'] = _ProgramHolder(
_load_program_desc(model_file_path))
else:
for _, _, file_names in os.walk(model_path):
for name in file_names:
if 'model' in name:
model_file_path = os.path.join(model_path, name)
method_name = name.strip('_')
if method_name == 'model':
method_name = 'forward'
else:
method_name.replace('model', '')
program_holder_dict[method_name] = _ProgramHolder(
_load_program_desc(model_file_path))
return program_holder_dict
def _construct_params_and_buffers(model_path,
programs,
separate_params=False,
params_filename=None):
var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME)
if os.path.exists(var_info_path):
var_dict = _load_persistable_vars(model_path, var_info_path,
separate_params, params_filename)
else:
var_dict = _load_persistable_vars_by_program(
model_path, programs['forward'], params_filename)
return var_dict
class TranslatedLayer(layers.Layer):
"""
TranslatedLayer is a imperative Layer for holding the model loaded by
:ref:`api_imperative_jit_load` . It can be used like a general Layer
object in eval or train mode.
.. note:
The TranslatedLayer objects should not be created by constructor, it only can be loaded and constructed by :ref:`api_imperative_jit_load` .
Examples:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
BATCH_SIZE = 32
BATCH_NUM = 20
def random_batch_reader():
def _get_random_images_and_labels(image_shape, label_shape):
image = np.random.random(size=image_shape).astype('float32')
label = np.random.random(size=label_shape).astype('int64')
return image, label
def __reader__():
for _ in range(BATCH_NUM):
batch_image, batch_label = _get_random_images_and_labels(
[BATCH_SIZE, 784], [BATCH_SIZE, 1])
yield batch_image, batch_label
return __reader__
class LinearNet(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(LinearNet, self).__init__()
self._linear = Linear(in_size, out_size)
@declarative
def forward(self, x):
return self._linear(x)
# enable dygraph mode
fluid.enable_dygraph()
# 1. train & save model.
# create network
net = LinearNet(784, 1)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
# create data loader
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(random_batch_reader())
# train
for data in train_loader():
img, label = data
label.stop_gradient = True
cost = net(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
net.clear_gradients()
model_path = "linear.example.model"
fluid.dygraph.jit.save(
layer=net,
model_path=model_path,
input_spec=[img])
# 2. load model as TranslatedLayer
translated_layer = fluid.dygraph.jit.load(model_path)
# inference
translated_layer.eval()
x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32'))
pred = translated_layer(x)
# fine-tune
translated_layer.train()
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=translated_layer.parameters())
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(random_batch_reader())
for data in train_loader():
img, label = data
label.stop_gradient = True
cost = translated_layer(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
translated_layer.clear_gradients()
"""
def __init__(self, programs, persistable_vars):
super(TranslatedLayer, self).__init__()
if not isinstance(programs, dict):
raise TypeError(
"TranslatedLayer need to use _ProgramHolder's dict for initialization."
)
if not isinstance(persistable_vars, dict):
raise TypeError(
"TranslatedLayer need to use persisatbale variable dict for initialization."
)
self._program_holder_dict = programs
for name, var in persistable_vars.items():
if isinstance(var, framework.ParamBase):
self.add_parameter(name, var)
elif isinstance(var, core.VarBase):
self.register_buffer(name, var)
else:
raise TypeError(
"Adding persistent variable which to layer is not supported now"
)
self._is_test = True
@staticmethod
@framework.dygraph_only
def _construct(model_path, configs=None):
# 0. dir and filename check
model_path = os.path.normpath(model_path)
if not os.path.isdir(model_path):
raise ValueError("There is no directory named '%s'" % model_path)
model_filename = None
params_filename = None
separate_params = False
if configs is not None:
model_filename = configs.model_filename
params_filename = configs.params_filename
separate_params = configs.separate_params
# 1. load program desc & construct _ProgramHolder
programs = _construct_program_holders(model_path, model_filename)
# 2. load layer parameters & parameter attirbutes
persistable_vars = _construct_params_and_buffers(
model_path, programs, separate_params, params_filename)
# 3. construct TranslatedLayer object
translated_layer = TranslatedLayer(programs, persistable_vars)
# 4. create TranslatedLayer's execution method
for method_name, program_holder in programs.items():
setattr(TranslatedLayer, method_name,
TranslatedLayer._execution_method_creator(method_name,
program_holder))
# 5. set TranslatedLayer's default mode to eval
translated_layer.eval()
return translated_layer
@staticmethod
def _execution_method_creator(method_name, program_holder):
def __impl__(self, *input):
# 1. prepare inputs, outputs, attrs
input_vars = []
for i, value in enumerate(input):
if not isinstance(value, (np.ndarray, core.VarBase)):
raise TypeError(
"The type of input in TranslatedLayer must be numpy array or Variable(VarBase), but received %s."
% type(value))
# NOTE: In order to unify the API, firstly convert the input to VarBase
if isinstance(value, np.ndarray):
var = core.VarBase(
value=value,
name=program_holder.input_names[i],
persistable=False,
place=framework._current_expected_place(),
zero_copy=True)
else:
var = value
# NOTE: we changed var name here,
# but it may be an important name set by user
var.name = program_holder.input_names[i]
input_vars.append(var)
persistable_vars = []
for var_name in program_holder.persistable_names:
if var_name in self._parameters:
persistable_vars.append(self._parameters[var_name])
elif var_name in self._buffers:
persistable_vars.append(self._buffers[var_name])
else:
raise ValueError(
"The persistable variable %s is not exists in current TranslatedLayer."
% var_name)
output_vars = []
for var_desc in program_holder.output_decs:
var = core.VarBase(var_desc.dtype(),
var_desc.shape(),
var_desc.name(), var_desc.type(), False)
output_vars.append(var)
# hold forward variables
tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [],
"program_out_scope",
core.VarDesc.VarType.STEP_SCOPES, True)
tmp_scope_vec.value().set_scope(program_holder.scope)
# 2. run prorgam by op
trace_program = program_holder.infer_program if self._is_test else program_holder.train_program
end_op_index = program_holder.infer_program.block(0).op_size()
framework._dygraph_tracer().trace_op(
type='run_program',
inputs={'X': input_vars,
'Params': persistable_vars},
outputs={'Out': output_vars,
'OutScope': tmp_scope_vec},
attrs={
'global_block': trace_program.block(0),
'start_op_index': 0,
'end_op_index': end_op_index,
'is_test': self._is_test
})
# NOTE: [ why need set param's gradient type here ]
# if user set sparse gradient mode, the param's gradient
# will be SelectedRows, not LoDTensor. But tracer will just
# set param grad VarBase by forward VarBase(LoDTensor)
# If we don't change grad_var type here, RunProgramOp need
# transform SelectedRows to LoDTensor forcely, it may not
# be user wanted result.
for persistable_var in persistable_vars:
grad_var_name = var.name + core.grad_var_suffix()
grad_var = trace_program.block(0).find_var(
cpt.to_bytes(grad_var_name))
# NOTE: cannot find var desc maybe not problem,
# such as in batch_norm
if grad_var is None:
continue
persistable_var._set_grad_type(grad_var.type())
# 3. prepare output, keep same form with inputs
outs = output_vars
if len(output_vars) == 1:
outs = output_vars[0]
return outs
__impl__.__name__ = method_name
return __impl__
def train(self):
self._is_test = False
def eval(self):
self._is_test = True
......@@ -14,17 +14,22 @@
from __future__ import print_function
__all__ = ['TracedLayer', 'declarative', 'dygraph_to_static_func']
import os
import six
import pickle
import warnings
from paddle.fluid import core
from paddle.fluid.compiler import CompiledProgram
from paddle.fluid.dygraph.base import program_desc_tracing_guard, switch_to_static_graph
from paddle.fluid.dygraph.dygraph_to_static.program_translator import ProgramTranslator
from paddle.fluid.dygraph.dygraph_to_static.program_translator import ProgramTranslator, FunctionSpec
from paddle.fluid.dygraph.layers import Layer
from paddle.fluid.executor import Executor, scope_guard
from paddle.fluid.framework import Program, Block, Variable, _dygraph_tracer, dygraph_only, _dygraph_guard, _current_expected_place, in_dygraph_mode
from paddle.fluid.framework import Program, Block, Variable, ParamBase, _dygraph_tracer, dygraph_only, _dygraph_guard, _current_expected_place, in_dygraph_mode
from paddle.fluid.wrapped_decorator import wrap_decorator
from paddle.fluid.dygraph.io import TranslatedLayer, VARIABLE_FILENAME, EXTRA_VAR_INFO_FILENAME
__all__ = ['TracedLayer', 'declarative', 'dygraph_to_static_func']
def create_program_from_desc(program_desc):
......@@ -166,6 +171,802 @@ def _declarative_(dygraph_func):
declarative = wrap_decorator(_declarative_)
class SaveLoadConfig(object):
"""
The additional configuration options may be used in function
:ref:`api_imperative_jit_save` that save :ref:`api_imperative_TranslatedLayer`
or used in function :ref:`api_imperative_jit_load` that
load :ref:`api_imperative_TranslatedLayer` .
Examples:
1. Using ``SaveLoadConfig`` when saving model
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
class SimpleNet(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size)
@declarative
def forward(self, x):
y = self._linear(x)
z = self._linear(y)
return z
# enable dygraph mode
fluid.enable_dygraph()
# train model
net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
for i in range(10):
out = net(x)
loss = fluid.layers.mean(out)
loss.backward()
adam.minimize(loss)
net.clear_gradients()
# use SaveLoadconfig when saving model
model_path = "simplenet.example.model"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.model_filename = "__simplenet__"
fluid.dygraph.jit.save(
layer=net,
model_path=model_path,
input_spec=[x],
configs=configs)
2. Using ``SaveLoadConfig`` when loading model
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
# enable dygraph mode
fluid.enable_dygraph()
# use SaveLoadconfig when loading model
model_path = "simplenet.example.model"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.model_filename = "__simplenet__"
infer_net = fluid.dygraph.jit.load(model_path, configs=configs)
# inference
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
pred = infer_net(x)
"""
def __init__(self):
self._output_spec = None
self._model_filename = None
self._params_filename = None
self._separate_params = False
# NOTE: Users rarely use following configs, so these configs are not open to users,
# reducing user learning costs, but we retain the configuration capabilities
# If True, programs are modified to only support direct inference deployment.
# Otherwise,more information will be stored for flexible optimization and re-training.
# Currently, only True is supported
self._export_for_deployment = True
# If True, It will save inference program only, and do not save params of Program
self._program_only = False
@property
def output_spec(self):
"""
Selects the output targets of the saved model ( :ref:`api_imperative_TranslatedLayer` ).
By default, all return variables of original Layer's forward function
are kept as the output of the saved TranslatedLayer.
The ``output_spec`` type should be list[Variable]. If the provided ``output_spec``
list is not all output variables, the saved model will be pruned according to the
given ``output_spec`` list.
.. note::
The ``output_spec`` is only used when saving model.
Examples:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
class SimpleNet(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size)
@declarative
def forward(self, x):
y = self._linear(x)
z = self._linear(y)
loss = fluid.layers.mean(z)
return z, loss
# enable dygraph mode
fluid.enable_dygraph()
# train model
net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
for i in range(10):
out, loss = net(x)
loss.backward()
adam.minimize(loss)
net.clear_gradients()
# use SaveLoadconfig.output_spec
model_path = "simplenet.example.model.output_spec"
configs = fluid.dygraph.jit.SaveLoadConfig()
# only keep the predicted output in saved model, diccard loss
configs.output_spec = [out]
fluid.dygraph.jit.save(
layer=net,
model_path=model_path,
input_spec=[x],
configs=configs)
infer_net = fluid.dygraph.jit.load(model_path, configs=configs)
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
# only have the predicted output
pred = infer_net(x)
"""
return self._output_spec
@output_spec.setter
def output_spec(self, spec):
if not isinstance(spec, list):
raise TypeError(
"The SaveLoadConfig.output_spec should be 'list', but received input type is %s."
% type(input))
for var in spec:
if not isinstance(var, core.VarBase):
raise TypeError(
"The element in SaveLoadConfig.output_spec list should be 'Variable', but received element's type is %s."
% type(var))
self._output_spec = spec
@property
def model_filename(self):
"""
The name of file to save the translated program of target Layer.
Default filename is :code:`__model__` .
Exampels:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
class SimpleNet(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size)
@declarative
def forward(self, x):
y = self._linear(x)
z = self._linear(y)
return z
# enable dygraph mode
fluid.enable_dygraph()
# train model
net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
for i in range(10):
out = net(x)
loss = fluid.layers.mean(out)
loss.backward()
adam.minimize(loss)
net.clear_gradients()
model_path = "simplenet.example.model.model_filename"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.model_filename = "__simplenet__"
# saving with configs.model_filename
fluid.dygraph.jit.save(
layer=net,
model_path=model_path,
input_spec=[x],
configs=configs)
# [result] the saved model directory contains:
# __simplenet__ __variables__ __variables.info__
# loading with configs.model_filename
infer_net = fluid.dygraph.jit.load(model_path, configs=configs)
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
pred = infer_net(x)
"""
return self._model_filename
@model_filename.setter
def model_filename(self, filename):
if not isinstance(filename, six.string_types):
raise TypeError(
"The SaveLoadConfig.model_filename should be str, but received input's type is %s."
% type(filename))
if len(filename) == 0:
raise ValueError(
"The SaveLoadConfig.model_filename is empty string.")
self._model_filename = filename
@property
def params_filename(self):
"""
The name of file to save all persistable variables in target Layer.
Default file name is :code:`__variables__` .
Exampels:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
class SimpleNet(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size)
@declarative
def forward(self, x):
y = self._linear(x)
z = self._linear(y)
return z
# enable dygraph mode
fluid.enable_dygraph()
# train model
net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
for i in range(10):
out = net(x)
loss = fluid.layers.mean(out)
loss.backward()
adam.minimize(loss)
net.clear_gradients()
model_path = "simplenet.example.model.params_filename"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.params_filename = "__params__"
# saving with configs.params_filename
fluid.dygraph.jit.save(
layer=net,
model_path=model_path,
input_spec=[x],
configs=configs)
# [result] the saved model directory contains:
# __model__ __params__ __variables.info__
# loading with configs.params_filename
infer_net = fluid.dygraph.jit.load(model_path, configs=configs)
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
pred = infer_net(x)
"""
return self._params_filename
@params_filename.setter
def params_filename(self, filename):
if not isinstance(filename, six.string_types):
raise TypeError(
"The SaveLoadConfig.params_filename should be str, but received input's type is %s."
% type(filename))
if len(filename) == 0:
raise ValueError(
"The SaveLoadConfig.params_filename is empty string.")
self._params_filename = filename
# NOTE: [why not use params_filename=None control params saved separately]
# The new save interface does not recommend parameters to be saved separately.
# Here, the concept should be separated as clearly as possible.
# Setting params_filename=None only means that the saved file name is set
# and without any other meaning. New separate_params control for file saved
# separately can makes the concept clearer.
@property
def separate_params(self):
"""
Configure whether to save the Layer parameters as separete files.
(In order to be compatible with the behavior of :ref:`api_fluid_io_save_inference_model` )
If True, each parameter will be saved to a file separately, the file name is the parameter name,
and the SaveLoadConfig.params_filename configuration will not take effect. Default False.
Examples:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
class SimpleNet(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size)
@declarative
def forward(self, x):
y = self._linear(x)
z = self._linear(y)
return z
# enable dygraph mode
fluid.enable_dygraph()
# train model
net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
for i in range(10):
out = net(x)
loss = fluid.layers.mean(out)
loss.backward()
adam.minimize(loss)
net.clear_gradients()
model_path = "simplenet.example.model.separate_params"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.separate_params = True
# saving with configs.separate_params
fluid.dygraph.jit.save(
layer=net,
model_path=model_path,
input_spec=[x],
configs=configs)
# [result] the saved model directory contains:
# linear_0.b_0 linear_0.w_0 __model__ __variables.info__
# loading with configs.params_filename
infer_net = fluid.dygraph.jit.load(model_path, configs=configs)
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
pred = infer_net(x)
"""
return self._separate_params
@separate_params.setter
def separate_params(self, value):
if not isinstance(value, bool):
raise TypeError(
"The SaveLoadConfig.separate_params should be bool value, but received input's type is %s."
% type(value))
self._separate_params = value
@switch_to_static_graph
def save(layer, model_path, input_spec=None, configs=None):
"""
Saves input declarative Layer as :ref:`api_imperative_TranslatedLayer`
format model, which can be used for inference or fine-tuning after loading.
It will save the translated program and all related persistable
variables of input declarative Layer to given ``model_path``.
The default saved translated program file name is ``__model__``,
and the default saved persistable variables file name is ``__variables__``,
and it also saved some additional variable description information to file
``__varibales.info__``, these additional information is used in fine-tuning.
The saved model can be loaded by follow APIs:
- :ref:`api_imperative_jit_load`
- :ref:`api_fluid_io_load_inference_model` (need pass ``params_filename='__variables__'``)
- Other C++ inference APIs
Args:
layer (Layer): the Layer to be saved. The Layer should be decorated by `@declarative`.
model_path (str): the directory to save the model.
input_spec (list[Varibale], optional): Describes the input of the saved model.
It is the example inputs that will be passed to saved TranslatedLayer's forward
function. If None, all input variables of the original Layer's forward function
would be the inputs of the saved model. Default None.
configs (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object
that specifies additional configuration options. Default None.
Returns:
None
Examples:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
BATCH_SIZE = 32
BATCH_NUM = 20
def random_batch_reader():
def _get_random_images_and_labels(image_shape, label_shape):
image = np.random.random(size=image_shape).astype('float32')
label = np.random.random(size=label_shape).astype('int64')
return image, label
def __reader__():
for _ in range(BATCH_NUM):
batch_image, batch_label = _get_random_images_and_labels(
[BATCH_SIZE, 784], [BATCH_SIZE, 1])
yield batch_image, batch_label
return __reader__
class LinearNet(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(LinearNet, self).__init__()
self._linear = Linear(in_size, out_size)
@declarative
def forward(self, x):
return self._linear(x)
# enable dygraph mode
fluid.enable_dygraph()
# create network
net = LinearNet(784, 1)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
# create data loader
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(random_batch_reader())
# train
for data in train_loader():
img, label = data
label.stop_gradient = True
cost = net(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
net.clear_gradients()
# save model
model_path = "linear.example.model"
fluid.dygraph.jit.save(
layer=net,
model_path=model_path,
input_spec=[img])
"""
def get_inout_spec(all_vars, target_vars, return_name=False):
valid_vars = [var for var in all_vars if isinstance(var, Variable)]
valid_var_dict = {}
for var in valid_vars:
valid_var_dict[var.name] = var
if target_vars:
for i, var in enumerate(target_vars):
# check target var whether exists
if var.name not in valid_var_dict:
raise RuntimeError(
"The variable to feed/fetch are not exist.")
target_vars[i] = valid_var_dict[var.name]
else:
target_vars = valid_vars
if return_name:
target_vars = [var.name for var in target_vars]
return target_vars
# 1. input check
prog_translator = ProgramTranslator()
if not prog_translator.enable:
raise RuntimeError(
"The paddle.imperative.jit.save doesn't work when setting ProgramTranslator.enable=False."
)
if not isinstance(layer, Layer):
raise TypeError(
"The input layer of paddle.imperative.jit.save should be 'Layer', but received layer type is %s."
% type(layer))
if configs is None:
configs = SaveLoadConfig()
if input_spec is not None:
if not isinstance(input_spec, list):
raise TypeError(
"The input input_spec should be 'list', but received input_spec's type is %s."
% type(input_spec))
for var in input_spec:
if not isinstance(var, core.VarBase):
raise TypeError(
"The element in input_spec list should be 'Variable', but received element's type is %s."
% type(var))
# 2. get program of declarative Layer.forward
prog_cache = prog_translator.get_program_cache()
# make dummy args & kwargs, to get excepted FunctionSpec
layer_func = FunctionSpec(type(layer).forward, [layer], {})
concrete_program, _ = prog_cache.get_program(layer_func)
# 3. share parameters from Layer to scope & record var info
scope = core.Scope()
state_dict = layer.state_dict()
extra_var_info = dict()
for structured_name, param_or_buffer in state_dict.items():
# share to scope
param_or_buffer_tensor = scope.var(param_or_buffer.name).get_tensor()
src_tensor = param_or_buffer.value().get_tensor()
param_or_buffer_tensor._share_data_with(src_tensor)
# record var info
extra_info_dict = dict()
extra_info_dict['structured_name'] = structured_name
extra_info_dict['stop_gradient'] = param_or_buffer.stop_gradient
if isinstance(param_or_buffer, ParamBase):
extra_info_dict['trainable'] = param_or_buffer.trainable
extra_var_info[param_or_buffer.name] = extra_info_dict
# 4. build input & output spec
input_var_names = get_inout_spec(concrete_program.inputs, input_spec, True)
output_vars = get_inout_spec(concrete_program.outputs, configs.output_spec)
# 5. save inference model
from paddle.fluid.io import save_inference_model
# VARIABLE_FILENAME keep nameing style consistent with '__model__'
if configs.params_filename is None:
configs.params_filename = VARIABLE_FILENAME
with scope_guard(scope):
save_inference_model(
dirname=model_path,
feeded_var_names=input_var_names,
target_vars=output_vars,
executor=Executor(_current_expected_place()),
main_program=concrete_program.main_program.clone(),
model_filename=configs.model_filename,
params_filename=None
if configs.separate_params else configs.params_filename,
export_for_deployment=configs._export_for_deployment,
program_only=configs._program_only)
# NOTE: [ Save extra variable info ]
# save_inference_model will lose some important variable information, including:
# - Variable name and correspondence (when saved variables as one file)
# - Variable.stop_gradient information
# - Which persistent variable are parameter and which are not
# - Parameter.trainable information
#
# The lost information cannot be recovered when it is loaded again,
# so if we want to perform fine-tune after loading, we may need to
# configure redundant information to proceed.
#
# Due to compatibility issues, we cannot change the original storage structure,
# but we can save these information in `jit.save` without changing the original
# storage to improve user experience. So we save extra information into
# file `__variables.info__`
extra_var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME)
with open(extra_var_info_path, 'wb') as f:
pickle.dump(extra_var_info, f, protocol=2)
@dygraph_only
def load(model_path, configs=None):
"""
:api_attr: imperative
Load model saved by :ref:`api_imperative_jit_save` or :ref:`api_fluid_io_save_inference_model`
as :ref:`api_imperative_TranslatedLayer`, then performing inference or fine-tune training.
.. note::
For some historical reasons, if you load model saved by :ref:`api_fluid_io_save_inference_model`,
there will be the following limitations when using it in fine-tuning:
1. Imperative mode do not support LoDTensor. All original model's feed targets or parametars that depend on LoD are temporarily unavailable.
2. All saved model's feed targets need to be passed into TranslatedLayer's forwrad function.
3. The variable's ``stop_gradient`` information is lost and can not be recovered.
4. The parameter's ``trainable`` information is lost and can not be recovered.
Args:
model_path (str): The directory path where the model is saved.
configs (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object that specifies
additional configuration options. Default None.
Returns:
TranslatedLayer: A Layer object can run saved translated model.
Examples:
1. Load model saved by :ref:`api_imperative_jit_save` then performing inference and fine-tune training.
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
BATCH_SIZE = 32
BATCH_NUM = 20
def random_batch_reader():
def _get_random_images_and_labels(image_shape, label_shape):
image = np.random.random(size=image_shape).astype('float32')
label = np.random.random(size=label_shape).astype('int64')
return image, label
def __reader__():
for _ in range(BATCH_NUM):
batch_image, batch_label = _get_random_images_and_labels(
[BATCH_SIZE, 784], [BATCH_SIZE, 1])
yield batch_image, batch_label
return __reader__
class LinearNet(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(LinearNet, self).__init__()
self._linear = Linear(in_size, out_size)
@declarative
def forward(self, x):
return self._linear(x)
# enable dygraph mode
fluid.enable_dygraph()
# 1. train & save model.
# create network
net = LinearNet(784, 1)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
# create data loader
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(random_batch_reader())
# train
for data in train_loader():
img, label = data
label.stop_gradient = True
cost = net(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
net.clear_gradients()
model_path = "linear.example.model"
fluid.dygraph.jit.save(
layer=net,
model_path=model_path,
input_spec=[img])
# 2. load model & inference
# load model
infer_net = fluid.dygraph.jit.load(model_path)
# inference
x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32'))
pred = infer_net(x)
# 3. load model & fine-tune
# load model
train_net = fluid.dygraph.jit.load(model_path)
train_net.train()
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=train_net.parameters())
# create data loader
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(random_batch_reader())
# fine-tune
for data in train_loader():
img, label = data
label.stop_gradient = True
cost = train_net(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
train_net.clear_gradients()
2. Load model saved by :ref:`api_fluid_io_save_inference_model` then performing and fine-tune training.
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
BATCH_SIZE = 32
BATCH_NUM = 20
def random_batch_reader():
def _get_random_images_and_labels(image_shape, label_shape):
image = np.random.random(size=image_shape).astype('float32')
label = np.random.random(size=label_shape).astype('int64')
return image, label
def __reader__():
for _ in range(BATCH_NUM):
batch_image, batch_label = _get_random_images_and_labels(
[BATCH_SIZE, 784], [BATCH_SIZE, 1])
yield batch_image, batch_label
return __reader__
img = fluid.data(name='img', shape=[None, 784], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
pred = fluid.layers.fc(input=img, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=pred, label=label)
avg_loss = fluid.layers.mean(loss)
optimizer = fluid.optimizer.SGD(learning_rate=0.001)
optimizer.minimize(avg_loss)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
loader = fluid.io.DataLoader.from_generator(
feed_list=[img, label], capacity=5, iterable=True)
loader.set_batch_generator(random_batch_reader(), places=place)
# 1. train and save inference model
for data in loader():
exe.run(
fluid.default_main_program(),
feed=data,
fetch_list=[avg_loss])
model_path = "fc.example.model"
fluid.io.save_inference_model(
model_path, ["img"], [pred], exe)
# enable dygraph mode
fluid.enable_dygraph()
# 2. load model & inference
fc = fluid.dygraph.jit.load(model_path)
x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32'))
pred = fc(x)
# 3. load model & fine-tune
fc = fluid.dygraph.jit.load(model_path)
fc.train()
sgd = fluid.optimizer.SGD(learning_rate=0.001,
parameter_list=fc.parameters())
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(
random_batch_reader(), places=place)
for data in train_loader():
img, label = data
label.stop_gradient = True
cost = fc(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
sgd.minimize(avg_loss)
"""
return TranslatedLayer._construct(model_path, configs)
@dygraph_only
def _trace(layer,
inputs,
......
......@@ -14,550 +14,26 @@
from __future__ import print_function
import logging
import numpy as np
import os
import six
from paddle.fluid.dygraph.jit import SaveLoadConfig
from paddle.fluid.dygraph.io import TranslatedLayer
from . import layers
from .. import core
from .. import framework
from .. import backward
from ..layers import nn
from .base import switch_to_static_graph
from ... import compat as cpt
# DESIGN IDEA: Add an special operator, execute static program inside operator.
#
# Op's Inputs:
# - the input variable of the user feed
# - the necessary parameters of the network
# Op's Outputs:
# - the output variable of fetch
#
# This op receives a complete program desc, internally creates scope
# and executor, executes this program. Key points:
#
# 1. Data Sharing:
# The varBase of the dynamic graph is not in the scope, so before the op
# executes the program internally, create persistent variables with the
# same name as feed, parameters, and fetch in the scope, and share the
# LoDTensor of the op input.
#
# 2. Forward and Backward Separation:
# Because the dynamic graph op performs the forward and backward separately,
# the forward program is used as the execution object of the forward op,
# and the reverse program is used as the execution object of the grad op.
class StaticModelRunner(layers.Layer):
# NOTE: This class will be deprecated later.
# It is kept here because PaddleHub is already using this API.
class StaticModelRunner(object):
"""
A Dynamic graph Layer for loading inference program and related parameters,
and then performing fine-tune training or inference.
The loaded program and parameters are saved by `fluid.io.save_inference_model`.
.. note::
**1. Dynamic graph mode do not support LoDTensor.
All original static graph model's feed targets or parametars
that depend on LoD are temporarily unavailable.**
**2. All saved inference model's feed targets need be given.**
**3. The ``stop_gradient`` information is lost and can not be recovered.**
**4. The parameter's ``trainable`` information is lost and can not be recovered.**
**5. Double gradient model is not supported now.**
**6. Now only supports loading models saved by `fluid.io.save_inference_model`.**
Args:
model_dir(str): The directory path where the model is saved.
model_filename(str, optional): The file name of saved inference program.
If set to None, a default filename is
:code:`__model__`.
The default value is None.
params_filename(str, optional): The file name of saved all related parameters.
If set to None, parameters are saved
in separate files.
The default value is None.
Returns:
Layer: A Layer object can run loaded program.
Examples:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
BATCH_SIZE = 32
BATCH_NUM = 20
SAVE_DIRNAME = "fc.inference.model"
def random_batch_reader():
def _get_random_images_and_labels(image_shape, label_shape):
image = np.random.random(size=image_shape).astype('float32')
label = np.random.random(size=label_shape).astype('int64')
return image, label
def __reader__():
for _ in range(BATCH_NUM):
batch_image, batch_label = _get_random_images_and_labels(
[BATCH_SIZE, 784], [BATCH_SIZE, 1])
yield batch_image, batch_label
return __reader__
def train_and_save_static_model(place):
img = fluid.data(name='img', shape=[None, 784], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
pred = fluid.layers.fc(input=img, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=pred, label=label)
avg_loss = fluid.layers.mean(loss)
optimizer = fluid.optimizer.SGD(learning_rate=0.001)
optimizer.minimize(avg_loss)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
loader = fluid.io.DataLoader.from_generator(
feed_list=[img, label], capacity=5, iterable=True)
loader.set_batch_generator(random_batch_reader(), places=place)
for data in loader():
exe.run(
fluid.default_main_program(),
feed=data,
fetch_list=[avg_loss])
# save model by fluid.io.save_inference_model
fluid.io.save_inference_model(
SAVE_DIRNAME, ["img"], [pred], exe)
# Step 1. train and save inference model in static graph mode
place = fluid.CPUPlace()
train_and_save_static_model(place)
# Step 2. load inference model in dygraph and fine-tune
with fluid.dygraph.guard(place):
fc = fluid.dygraph.static_runner.StaticModelRunner(SAVE_DIRNAME)
sgd = fluid.optimizer.SGD(learning_rate=0.001,
parameter_list=fc.parameters())
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(
random_batch_reader(), places=place)
for data in train_loader():
img = data[0]
label = data[1]
label.stop_gradient = True
cost = fc(inputs=img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
sgd.minimize(avg_loss)
This is a temporary API, which will be deprecated later, please use
`fluid.dygraph.jit.load` to achieve the same function.
"""
def __init__(self, model_dir, model_filename=None, params_filename=None):
super(StaticModelRunner, self).__init__()
# Step 0. key variable definitions
# loaded inference program desc
self._infer_program_desc = None
# recovered train program desc
self._train_program_desc = None
# StaticModelRunner executed program desc,
# switch infer or train by train() and eval()
self._trace_program_desc = None
self._inner_scope = core.Scope()
# the layer outputs var desc
self._output_descs = []
# input, output, params name list
self._input_names = []
self._output_names = []
self._param_names = []
# train or eval flag
self._is_test = False
# Step 1. load program desc from disk
# the saved model hold feed, fetch & scale op, no need, can be remove
self._infer_program_desc = self._load_static_model(model_dir,
model_filename)
# Step 2. load all parameters
self._load_persisitable_dict(model_dir, params_filename)
# Step 3. generate backwar program desc
self._train_program_desc = self._append_backward_desc()
# Step 4. recheck parameters stop gradients
self._recheck_stop_gradients()
# Step 5. set default mode to train
self.train()
def train(self):
self._is_test = False
self._trace_program_desc = self._train_program_desc
def eval(self):
self._is_test = True
self._trace_program_desc = self._infer_program_desc
def forward(self, *args):
"""
Executed forward part of StaticModelRunner Layer.
Generally execute directly using the Layer object.
Args:
args(tuple(np.ndarray|Variable)): the inputs of StaticModelRunner.
The order of input variables needs to be the same as the order
of feed variables when using `save_inference_model` to save model.
Returns:
Variable|list[Variable]: The forward outputs of StaticModelRunner Layer.
If there is only one output, return Variable;
if there are multiple outputs, return list[Variable].
"""
# Step 1. prepare inputs, outputs, attrs
input_vars = []
for i, value in enumerate(args):
if not isinstance(value, (np.ndarray, core.VarBase)):
raise TypeError(
"The type of inputs.value in StaticModelRunner.forward must be numpy array or Variable(VarBase), but received %s."
% type(value))
# NOTE: In order to unify the API, firstly convert the input to VarBase
if isinstance(value, np.ndarray):
var = core.VarBase(
value=value,
name=self._input_names[i],
persistable=False,
place=framework._current_expected_place(),
zero_copy=True)
else:
var = value
# TODO: here may have important name set by user
var.name = self._input_names[i]
input_vars.append(var)
params = []
for param in self._parameters.values():
params.append(param)
output_vars = []
for var_desc in self._output_descs:
var = core.VarBase(var_desc.dtype(),
var_desc.shape(),
var_desc.name(), var_desc.type(), False)
output_vars.append(var)
# hold forward variables
tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [],
"program_out_scope",
core.VarDesc.VarType.STEP_SCOPES, True)
tmp_scope_vec.value().set_scope(self._inner_scope)
# Step 2. run prorgam by op
framework._dygraph_tracer().trace_op(
type='run_program',
inputs={'X': input_vars,
'Params': params},
outputs={'Out': output_vars,
'OutScope': tmp_scope_vec},
attrs={
'global_block': self._trace_program_desc.block(0),
'start_op_index': 0,
'end_op_index': self._infer_program_desc.block(0).op_size(),
'is_test': self._is_test
})
# NOTE: [ why need set param's gradient type here ]
# if user set sparse gradient mode, the param's gradient
# will be SelectedRows, not LoDTensor. But tracer will just
# set param grad VarBase by forward VarBase(LoDTensor)
# If we don't change grad_var type here, RunProgramOp need
# transform SelectedRows to LoDTensor forcely, it may not
# be user wanted result.
for param in params:
grad_name = param.name + core.grad_var_suffix()
grad_var = self._trace_program_desc.block(0).find_var(
cpt.to_bytes(grad_name))
# NOTE: cannot find var desc maybe no problem, such as in batch_norm
if grad_var is None:
continue
param._set_grad_type(grad_var.type())
# Step 3. prepare output, keep same form with inputs
outs = output_vars
if len(output_vars) == 1:
outs = output_vars[0]
return outs
def _load_static_model(self, model_dir, model_filename=None):
# Step 1. dir and filename check
load_dirname = os.path.normpath(model_dir)
if not os.path.isdir(load_dirname):
raise ValueError("There is no directory named '%s'" % load_dirname)
def __new__(cls, model_dir, model_filename=None, params_filename=None):
configs = SaveLoadConfig()
if model_filename is not None:
model_filename = os.path.basename(model_filename)
else:
model_filename = "__model__"
model_filename = os.path.join(load_dirname, model_filename)
# Step 2. parse program desc
with open(model_filename, "rb") as f:
program_desc_str = f.read()
program_desc = core.ProgramDesc(program_desc_str)
if not core._is_program_version_supported(program_desc._version()):
raise ValueError("Unsupported program version: %d\n" %
program_desc._version())
# Step 3.
# - remove feed, fetch and useless scale-1 op
# - remove op_callstack attr
ops_to_remove = []
root_block = program_desc.block(0)
for i in six.moves.range(root_block.op_size()):
op = root_block.op(i)
if op.type() == 'feed':
ops_to_remove.append(i)
feed_var_name = cpt.to_bytes(op.input('X')[0])
root_block._remove_var(feed_var_name)
self._input_names.append(cpt.to_bytes(op.output('Out')[0]))
elif op.type() == 'scale' and op.output('Out')[0].startswith(
'save_infer_model/scale_'):
ops_to_remove.append(i)
out_var_name = cpt.to_bytes(op.output('Out')[0])
root_block._remove_var(out_var_name)
self._output_names.append(cpt.to_bytes(op.input('X')[0]))
self._output_descs.append(
root_block.find_var(cpt.to_bytes(op.input('X')[0])))
elif op.type() == 'fetch':
ops_to_remove.append(i)
fetch_var_name = cpt.to_bytes(op.output('Out')[0])
root_block._remove_var(fetch_var_name)
# NOTE: some old pre-train models have no extra scale_op
if not op.input('X')[0].startswith('save_infer_model/scale_'):
self._output_names.append(cpt.to_bytes(op.input('X')[0]))
self._output_descs.append(
root_block.find_var(cpt.to_bytes(op.input('X')[0])))
else:
if op.has_attr("op_callstack"):
op.remove_attr("op_callstack")
for op_idx in reversed(ops_to_remove):
root_block._remove_op(op_idx, op_idx + 1)
# NOTE: reverse feed vars
self._input_names.reverse()
# Step 4. add scale for outputs
tmp_program = self._build_program_by_desc(program_desc)
self._append_scale_to_output(tmp_program)
return program_desc
@switch_to_static_graph
def _append_scale_to_output(self, program):
# 1. append scale & save var
scale_output_vars = []
with framework.program_guard(program):
for i, out in enumerate(self._output_descs):
var = program.global_block().var(out.name())
var = nn.scale(
var, 1., name="static_model_runner/scale_{}".format(i))
scale_output_vars.append(var)
# 2. update output names & descs
for i, var in enumerate(scale_output_vars):
self._output_names[i] = var.name
self._output_descs[i] = var.desc
@switch_to_static_graph
def _append_backward_desc(self):
assert self._infer_program_desc is not None, "The StaticModelRunner not initialized properly."
program_desc_copy = core.ProgramDesc(self._infer_program_desc)
# Step 1. set all `is_test` attributes to False
self._change_is_test_status(program_desc_copy, False)
# Step 2. prepare program and related var
# NOTE: To reuse backward interfaces, build Program firstly.
# Originally, there is no need to build a program, but need to almost
# rewrite a series of methods for append_backward for program_desc.
# Therefore, in order to reuse the method of backward.py, build the program here.
fwd_op_num = program_desc_copy.block(0).op_size()
program = self._build_program_by_desc(program_desc_copy)
# TODO: could the targets be in sub block?
targets = []
for out in self._output_descs:
targets.append(program.global_block().var(out.name()))
# Step 3. append backward
backward.gradients(targets=targets, inputs=[])
return program.desc
def _load_persisitable_dict(self, model_dir, params_filename=None):
load_dirname = os.path.normpath(model_dir)
assert self._infer_program_desc is not None, "The StaticModelRunner not initialized properly."
persis_vars = self._get_persis_vars(self._infer_program_desc)
load_var_map = {}
for each_var in persis_vars:
orig_each_name = each_var.name()
# append suffix
self._append_loaded_suffix_to_param(each_var)
# create output varbase
new_var = framework.ParamBase(
shape=each_var.shape(),
dtype=each_var.dtype(),
name=each_var.name(),
type=each_var.type(),
persistable=True)
if params_filename is None:
if not self._is_parameter(each_var):
continue
framework._dygraph_tracer().trace_op(
type='load',
inputs={},
outputs={'Out': new_var},
attrs={
'file_path': os.path.join(load_dirname, orig_each_name)
})
new_var.stop_gradient = False
self.add_parameter(name=new_var.name, parameter=new_var)
self._param_names.append(new_var.name)
else:
load_var_map[each_var.name()] = new_var
configs.model_filename = model_filename
if params_filename is not None:
load_var_list = []
for name in sorted(load_var_map.keys()):
load_var_list.append(load_var_map[name])
framework._dygraph_tracer().trace_op(
type='load_combine',
inputs={},
outputs={'Out': load_var_list},
attrs={
'file_path': os.path.join(load_dirname, params_filename)
})
for each_var in persis_vars:
if not self._is_parameter(each_var):
continue
param = load_var_map[each_var.name()]
param.stop_gradient = False
self.add_parameter(name=param.name, parameter=param)
self._param_names.append(param.name)
def _recheck_stop_gradients(self):
assert self._train_program_desc is not None, "The StaticModelRunner not initialized properly."
# NOTE: After loading the model, the stop_gradient information
# of the original variable is lost, but if a parameter does not
# have a corresponding @GRAD variable in the backward program,
# it can be said that it is also stop_gradient
all_var_names = self._get_all_var_names(self._train_program_desc)
for param_name in self._parameters:
param_grad_name = param_name + core.grad_var_suffix()
if param_grad_name not in all_var_names:
self._parameters[param_name].stop_gradient = True
def _get_all_var_names(self, program_desc):
all_var_names = set()
for i in six.moves.range(program_desc.num_blocks()):
block = program_desc.block(i)
for var in block.all_vars():
all_var_names.add(var.name())
return all_var_names
def _get_persis_vars(self, program_desc):
persis_vars = []
for i in six.moves.range(program_desc.num_blocks()):
block = program_desc.block(i)
persis_vars.extend(
list(filter(self._is_persistable, block.all_vars())))
return persis_vars
@switch_to_static_graph
def _build_program_by_desc(self, program_desc):
prog = framework.Program()
prog.desc = program_desc
prog.blocks = [
framework.Block(prog, i)
for i in six.moves.range(prog.desc.num_blocks())
]
prog._sync_with_cpp()
return prog
def _is_persistable(self, var_desc):
if var_desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \
var_desc.type() == core.VarDesc.VarType.FETCH_LIST or \
var_desc.type() == core.VarDesc.VarType.READER or \
var_desc.type() == core.VarDesc.VarType.RAW:
return False
return var_desc.persistable()
def _is_parameter(self, persis_var_desc):
assert self._infer_program_desc is not None, "The StaticModelRunner not initialized properly."
# 1. firstly, param should be input of op
input_ops = [] # op can be repeated
for block_idx in six.moves.range(self._infer_program_desc.num_blocks()):
block = self._infer_program_desc.block(block_idx)
for op_idx in six.moves.range(block.op_size()):
op = block.op(op_idx)
# NOTE: parameter is the input of a certain op
if persis_var_desc.name() in op.input_arg_names():
input_ops.append(op)
# 2. secondly, param should not be output of op or be same op's output
for block_idx in six.moves.range(self._infer_program_desc.num_blocks()):
block = self._infer_program_desc.block(block_idx)
for op_idx in six.moves.range(block.op_size()):
op = block.op(op_idx)
if persis_var_desc.name() in op.output_arg_names():
# such as batch_norm_op
if op in input_ops:
continue
else:
return False
return True
def _change_is_test_status(self, program_desc, is_test):
# change all `is_test` attributes
for i in six.moves.range(program_desc.num_blocks()):
block = program_desc.block(i)
for j in six.moves.range(block.op_size()):
op = block.op(j)
if op.has_attr('is_test'):
op._set_attr('is_test', is_test)
def _append_loaded_suffix(self, name):
"""
Append grad suffix to the given variable name
e.g. x ==> x@LOADED
"""
suffix = core.loaded_var_suffix()
name = cpt.to_text(name)
if suffix not in name:
name = name + suffix
return name
def _append_loaded_suffix_to_param(self, param_desc):
old_name = param_desc.name()
new_name = self._append_loaded_suffix(param_desc.name())
param_desc.set_name(new_name)
for block_idx in six.moves.range(self._infer_program_desc.num_blocks()):
block = self._infer_program_desc.block(block_idx)
for op_idx in six.moves.range(block.op_size()):
op = block.op(op_idx)
op._rename_input(old_name, new_name)
op._rename_output(old_name, new_name)
configs.params_filename = params_filename
return TranslatedLayer._construct(model_dir, configs)
......@@ -26,6 +26,7 @@ from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph.nn import Conv2D, Linear, Pool2D
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.dygraph.jit import declarative
from paddle.fluid.dygraph.io import VARIABLE_FILENAME
from paddle.fluid.dygraph.dygraph_to_static import ProgramTranslator
SEED = 2020
......@@ -201,6 +202,9 @@ class TestMNISTWithDeclarative(TestMNIST):
self.check_save_inference_model([dy_x_data, y_data],
prog_trans, to_static,
prediction)
# new save load check
self.check_jit_save_load(mnist, [dy_x_data], [img],
to_static, prediction)
break
return loss_data
......@@ -224,6 +228,45 @@ class TestMNISTWithDeclarative(TestMNIST):
return np.array(results[0])
def check_jit_save_load(self, model, inputs, input_spec, to_static, gt_out):
if to_static:
infer_model_path = "./test_mnist_inference_model_by_jit_save"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.output_spec = [gt_out]
fluid.dygraph.jit.save(
layer=model,
model_path=infer_model_path,
input_spec=input_spec,
configs=configs)
# load in static mode
static_infer_out = self.jit_load_and_run_inference_static(
infer_model_path, inputs)
self.assertTrue(np.allclose(gt_out.numpy(), static_infer_out))
# load in dygraph mode
dygraph_infer_out = self.jit_load_and_run_inference_dygraph(
infer_model_path, inputs)
self.assertTrue(np.allclose(gt_out.numpy(), dygraph_infer_out))
@switch_to_static_graph
def jit_load_and_run_inference_static(self, model_path, inputs):
exe = fluid.Executor(self.place)
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(
dirname=model_path,
executor=exe,
params_filename=VARIABLE_FILENAME)
assert len(inputs) == len(feed_target_names)
results = exe.run(inference_program,
feed=dict(zip(feed_target_names, inputs)),
fetch_list=fetch_targets)
return np.array(results[0])
def jit_load_and_run_inference_dygraph(self, model_path, inputs):
infer_net = fluid.dygraph.jit.load(model_path)
pred = infer_net(inputs[0])
return pred.numpy()
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
BATCH_SIZE = 32
BATCH_NUM = 20
SEED = 10
def random_batch_reader():
def _get_random_images_and_labels(image_shape, label_shape):
np.random.seed(SEED)
image = np.random.random(size=image_shape).astype('float32')
label = np.random.random(size=label_shape).astype('int64')
return image, label
def __reader__():
for _ in range(BATCH_NUM):
batch_image, batch_label = _get_random_images_and_labels(
[BATCH_SIZE, 784], [BATCH_SIZE, 1])
yield batch_image, batch_label
return __reader__
class LinearNet(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(LinearNet, self).__init__()
self._linear = Linear(in_size, out_size)
@declarative
def forward(self, x):
return self._linear(x)
class LinearNetNotDeclarative(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(LinearNetNotDeclarative, self).__init__()
self._linear = Linear(in_size, out_size)
def forward(self, x):
return self._linear(x)
class LinearNetReturnLoss(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
super(LinearNetReturnLoss, self).__init__()
self._linear = Linear(in_size, out_size)
@declarative
def forward(self, x):
y = self._linear(x)
z = self._linear(y)
loss = fluid.layers.mean(z)
return z, loss
def train(layer):
# create optimizer
adam = fluid.optimizer.AdamOptimizer(
learning_rate=0.1, parameter_list=layer.parameters())
# create data loader
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(random_batch_reader())
# train
for data in train_loader():
img, label = data
label.stop_gradient = True
cost = layer(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
layer.clear_gradients()
return [img], layer, avg_loss
def infer(layer):
x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32'))
return layer(x)
class TestJitSaveLoad(unittest.TestCase):
def setUp(self):
self.model_path = "model.test_jit_save_load"
# enable dygraph mode
fluid.enable_dygraph()
# config seed
fluid.default_main_program().random_seed = SEED
def train_and_save_model(self):
layer = LinearNet(784, 1)
example_inputs, layer, _ = train(layer)
fluid.dygraph.jit.save(
layer=layer, model_path=self.model_path, input_spec=example_inputs)
return layer
def test_save(self):
# train and save model
self.train_and_save_model()
def test_load_infernece(self):
# train and save model
train_layer = self.train_and_save_model()
# load model
infer_layer = fluid.dygraph.jit.load(self.model_path)
train_layer.eval()
# inference & compare
x = fluid.dygraph.to_variable(
np.random.random((1, 784)).astype('float32'))
self.assertTrue(
np.array_equal(train_layer(x).numpy(), infer_layer(x).numpy()))
def test_load_finetune(self):
# train and save model
train_layer = self.train_and_save_model()
# load model
load_train_layer = fluid.dygraph.jit.load(self.model_path)
load_train_layer.train()
# train & compare
_, _, train_loss = train(train_layer)
_, _, load_train_loss = train(load_train_layer)
self.assertTrue(
np.array_equal(train_loss.numpy(), load_train_loss.numpy()))
def test_save_get_program_failed(self):
layer = LinearNetNotDeclarative(784, 1)
example_inputs, layer, _ = train(layer)
with self.assertRaises(RuntimeError):
fluid.dygraph.jit.save(
layer=layer,
model_path=self.model_path,
input_spec=example_inputs)
class TestJitSaveLoadConfig(unittest.TestCase):
def setUp(self):
# enable dygraph mode
fluid.enable_dygraph()
# config seed
fluid.default_main_program().random_seed = SEED
def basic_save_load(self, layer, model_path, configs):
# 1. train & save
example_inputs, train_layer, _ = train(layer)
fluid.dygraph.jit.save(
layer=train_layer,
model_path=model_path,
input_spec=example_inputs,
configs=configs)
# 2. load
infer_layer = fluid.dygraph.jit.load(model_path, configs=configs)
train_layer.eval()
# 3. inference & compare
x = fluid.dygraph.to_variable(
np.random.random((1, 784)).astype('float32'))
self.assertTrue(
np.array_equal(train_layer(x).numpy(), infer_layer(x).numpy()))
def test_model_filename(self):
layer = LinearNet(784, 1)
model_path = "model.save_load_config.output_spec"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.model_filename = "__simplenet__"
self.basic_save_load(layer, model_path, configs)
def test_params_filename(self):
layer = LinearNet(784, 1)
model_path = "model.save_load_config.params_filename"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.params_filename = "__params__"
self.basic_save_load(layer, model_path, configs)
def test_separate_params(self):
layer = LinearNet(784, 1)
model_path = "model.save_load_config.separate_params"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.separate_params = True
self.basic_save_load(layer, model_path, configs)
def test_output_spec(self):
train_layer = LinearNetReturnLoss(8, 8)
adam = fluid.optimizer.AdamOptimizer(
learning_rate=0.1, parameter_list=train_layer.parameters())
x = fluid.dygraph.to_variable(
np.random.random((4, 8)).astype('float32'))
for i in range(10):
out, loss = train_layer(x)
loss.backward()
adam.minimize(loss)
train_layer.clear_gradients()
model_path = "model.save_load_config.output_spec"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.output_spec = [out]
fluid.dygraph.jit.save(
layer=train_layer,
model_path=model_path,
input_spec=[x],
configs=configs)
train_layer.eval()
infer_layer = fluid.dygraph.jit.load(model_path, configs=configs)
x = fluid.dygraph.to_variable(
np.random.random((4, 8)).astype('float32'))
self.assertTrue(
np.array_equal(train_layer(x)[0].numpy(), infer_layer(x).numpy()))
if __name__ == '__main__':
unittest.main()
......@@ -16,7 +16,7 @@
__all__ = [
'BackwardStrategy', 'enabled', 'grad', 'guard', 'LayerList', 'load', 'save',
'prepare_context', 'to_variable', 'TracedLayer', 'no_grad', 'ParallelEnv',
'ProgramTranslator', 'declarative', 'DataParallel'
'ProgramTranslator', 'declarative', 'DataParallel', 'TranslatedLayer', 'jit'
]
__all__ += [
......@@ -31,6 +31,7 @@ from ..fluid.dygraph.checkpoint import save_dygraph as save
from ..fluid.dygraph.parallel import prepare_context, ParallelEnv, DataParallel
from ..fluid.dygraph.jit import TracedLayer, declarative
from ..fluid.dygraph import ProgramTranslator
from . import jit
from ..fluid.dygraph.learning_rate_scheduler import NoamDecay, PiecewiseDecay, NaturalExpDecay, ExponentialDecay, \
InverseTimeDecay, PolynomialDecay, CosineDecay
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ...fluid.dygraph.jit import save, load, SaveLoadConfig
from ...fluid.dygraph.io import TranslatedLayer
__all__ = ['save', 'load', 'SaveLoadConfig']
......@@ -202,6 +202,7 @@ packages=['paddle',
'paddle.nn.initializer',
'paddle.metric',
'paddle.imperative',
'paddle.imperative.jit',
'paddle.tensor',
]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册