未验证 提交 358f7852 编写于 作者: Q qingqing01 提交者: GitHub

Merge pull request #3 from qingqing01/api_loss

Refine Loss in Model
- repo: https://github.com/PaddlePaddle/mirrors-yapf.git
sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
hooks:
- id: yapf
files: \.py$
- repo: https://github.com/pre-commit/pre-commit-hooks
sha: a11d9314b22d8f8c7556443875b731ef05965464
hooks:
- id: check-merge-conflict
- id: check-symlinks
- id: detect-private-key
files: (?!.*paddle)^.*$
- id: end-of-file-fixer
files: \.(md|yml)$
- id: trailing-whitespace
files: \.(md|yml)$
- repo: https://github.com/Lucas-C/pre-commit-hooks
sha: v1.0.1
hooks:
- id: forbid-crlf
files: \.(md|yml)$
- id: remove-crlf
files: \.(md|yml)$
- id: forbid-tabs
files: \.(md|yml)$
- id: remove-tabs
files: \.(md|yml)$
...@@ -26,7 +26,7 @@ from paddle import fluid ...@@ -26,7 +26,7 @@ from paddle import fluid
from paddle.fluid.optimizer import Momentum from paddle.fluid.optimizer import Momentum
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from model import Model, CrossEntropy from model import Model, CrossEntropy, Input
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
...@@ -78,7 +78,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -78,7 +78,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
class MNIST(Model): class MNIST(Model):
def __init__(self): def __init__(self):
super(MNIST, self).__init__() super(MNIST, self).__init__()
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
1, 20, 5, 2, 2, act="relu") 1, 20, 5, 2, 2, act="relu")
...@@ -88,12 +87,13 @@ class MNIST(Model): ...@@ -88,12 +87,13 @@ class MNIST(Model):
pool_2_shape = 50 * 4 * 4 pool_2_shape = 50 * 4 * 4
SIZE = 10 SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
self._fc = Linear(800, self._fc = Linear(
10, 800,
param_attr=fluid.param_attr.ParamAttr( 10,
initializer=fluid.initializer.NormalInitializer( param_attr=fluid.param_attr.ParamAttr(
loc=0.0, scale=scale)), initializer=fluid.initializer.NormalInitializer(
act="softmax") loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs): def forward(self, inputs):
x = self._simple_img_conv_pool_1(inputs) x = self._simple_img_conv_pool_1(inputs)
...@@ -137,13 +137,15 @@ def main(): ...@@ -137,13 +137,15 @@ def main():
paddle.batch(paddle.dataset.mnist.test(), paddle.batch(paddle.dataset.mnist.test(),
batch_size=FLAGS.batch_size, drop_last=True), 1, 1) batch_size=FLAGS.batch_size, drop_last=True), 1, 1)
device_ids = list(range(FLAGS.num_devices))
with guard: with guard:
model = MNIST() model = MNIST()
optim = Momentum(learning_rate=FLAGS.lr, momentum=.9, optim = Momentum(
parameter_list=model.parameters()) learning_rate=FLAGS.lr,
model.prepare(optim, CrossEntropy()) momentum=.9,
parameter_list=model.parameters())
inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
model.prepare(optim, CrossEntropy(), inputs, labels)
if FLAGS.resume is not None: if FLAGS.resume is not None:
model.load(FLAGS.resume) model.load(FLAGS.resume)
...@@ -154,8 +156,7 @@ def main(): ...@@ -154,8 +156,7 @@ def main():
val_acc = 0.0 val_acc = 0.0
print("======== train epoch {} ========".format(e)) print("======== train epoch {} ========".format(e))
for idx, batch in enumerate(train_loader()): for idx, batch in enumerate(train_loader()):
outputs, losses = model.train(batch[0], batch[1], device='gpu', outputs, losses = model.train(batch[0], batch[1])
device_ids=device_ids)
acc = accuracy(outputs[0], batch[1])[0] acc = accuracy(outputs[0], batch[1])[0]
train_loss += np.sum(losses) train_loss += np.sum(losses)
...@@ -166,8 +167,7 @@ def main(): ...@@ -166,8 +167,7 @@ def main():
print("======== eval epoch {} ========".format(e)) print("======== eval epoch {} ========".format(e))
for idx, batch in enumerate(val_loader()): for idx, batch in enumerate(val_loader()):
outputs, losses = model.eval(batch[0], batch[1], device='gpu', outputs, losses = model.eval(batch[0], batch[1])
device_ids=device_ids)
acc = accuracy(outputs[0], batch[1])[0] acc = accuracy(outputs[0], batch[1])[0]
val_loss += np.sum(losses) val_loss += np.sum(losses)
...@@ -185,14 +185,21 @@ if __name__ == '__main__': ...@@ -185,14 +185,21 @@ if __name__ == '__main__':
parser.add_argument( parser.add_argument(
"-e", "--epoch", default=100, type=int, help="number of epoch") "-e", "--epoch", default=100, type=int, help="number of epoch")
parser.add_argument( parser.add_argument(
'--lr', '--learning-rate', default=1e-3, type=float, metavar='LR', '--lr',
'--learning-rate',
default=1e-3,
type=float,
metavar='LR',
help='initial learning rate') help='initial learning rate')
parser.add_argument( parser.add_argument(
"-b", "--batch_size", default=128, type=int, help="batch size") "-b", "--batch_size", default=128, type=int, help="batch size")
parser.add_argument( parser.add_argument(
"-n", "--num_devices", default=4, type=int, help="number of devices") "-n", "--num_devices", default=1, type=int, help="number of devices")
parser.add_argument( parser.add_argument(
"-r", "--resume", default=None, type=str, "-r",
"--resume",
default=None,
type=str,
help="checkpoint path to resume") help="checkpoint path to resume")
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
main() main()
...@@ -27,10 +27,12 @@ from paddle.fluid.executor import global_scope ...@@ -27,10 +27,12 @@ from paddle.fluid.executor import global_scope
from paddle.fluid.io import is_belong_to_optimizer from paddle.fluid.io import is_belong_to_optimizer
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
__all__ = ['shape_hints', 'Model', 'Loss', 'CrossEntropy'] __all__ = ['Model', 'Loss', 'CrossEntropy', 'Input']
def to_list(value): def to_list(value):
if value is None:
return value
if isinstance(value, (list, tuple)): if isinstance(value, (list, tuple)):
return value return value
return [value] return [value]
...@@ -51,20 +53,14 @@ def extract_args(func): ...@@ -51,20 +53,14 @@ def extract_args(func):
return inspect.getargspec(func)[0] return inspect.getargspec(func)[0]
def shape_hints(**hints): class Input(fluid.dygraph.Layer):
assert hints, "hints can not be empty" def __init__(self, shape=None, dtype=None, name=None):
assert all(isinstance(h, (list, tuple)) for h in hints.values()), \ self.shape = shape
"shape hint must be a list or tuple" self.dtype = dtype
self.name = name
def wrapper(func): def forward(self):
args = extract_args(func) return fluid.data(self.name, shape=self.shape, dtype=self.dtype)
invalid = set(hints.keys()) - set(args)
assert not invalid, \
"shape hint for arguments that are not present in forward method" \
+ ": ({})".format(", ".join(invalid))
func.shape_hints = hints
return func
return wrapper
class Loss(object): class Loss(object):
...@@ -72,12 +68,6 @@ class Loss(object): ...@@ -72,12 +68,6 @@ class Loss(object):
super(Loss, self).__init__() super(Loss, self).__init__()
self.average = average self.average = average
def infer_shape(self, outputs):
return [o.shape for o in outputs]
def infer_dtype(self, outputs):
return [o.dtype for o in outputs]
def forward(self, outputs, labels): def forward(self, outputs, labels):
raise NotImplementedError() raise NotImplementedError()
...@@ -86,24 +76,21 @@ class Loss(object): ...@@ -86,24 +76,21 @@ class Loss(object):
if in_dygraph_mode(): if in_dygraph_mode():
labels = [to_variable(l) for l in labels] labels = [to_variable(l) for l in labels]
losses = to_list(self.forward(to_list(outputs), labels)) losses = to_list(self.forward(to_list(outputs), labels))
if not self.average: if self.average:
return losses losses = [fluid.layers.reduce_mean(l) for l in losses]
return [fluid.layers.reduce_mean(l) for l in losses] else:
losses = [fluid.layers.reduce_sum(l) for l in losses]
return losses
class CrossEntropy(Loss): class CrossEntropy(Loss):
def __init__(self): def __init__(self, average=True):
super(CrossEntropy, self).__init__() super(CrossEntropy, self).__init__()
def infer_shape(self, outputs):
return [o.shape[:-1] + (1, ) for o in outputs]
def infer_dtype(self, outputs):
return ['int64' for _ in outputs]
def forward(self, outputs, labels): def forward(self, outputs, labels):
return [fluid.layers.cross_entropy(o, l) for o, l in zip( return [
outputs, labels)] fluid.layers.cross_entropy(o, l) for o, l in zip(outputs, labels)
]
class StaticGraphAdapter(object): class StaticGraphAdapter(object):
...@@ -116,21 +103,13 @@ class StaticGraphAdapter(object): ...@@ -116,21 +103,13 @@ class StaticGraphAdapter(object):
self._orig_prog = fluid.default_main_program() self._orig_prog = fluid.default_main_program()
self._label_vars = {} # label variables self._label_vars = {} # label variables
self._input_vars = {} # label variables
self._endpoints = {} self._endpoints = {}
self._loss_endpoint = None self._loss_endpoint = None
self._executor = None self._executor = None
self._progs = {} self._progs = {}
self._compiled_progs = {} self._compiled_progs = {}
self._lazy_load_optimizer = None
# parse shape hints
self._input_desc = OrderedDict([
(n, None) for n in extract_args(self.model.forward) if n != 'self'
])
if hasattr(self.model.forward, 'shape_hints'):
self._input_desc.update(self.model.forward.shape_hints)
@property @property
def mode(self): def mode(self):
return self.model.mode return self.model.mode
...@@ -139,21 +118,19 @@ class StaticGraphAdapter(object): ...@@ -139,21 +118,19 @@ class StaticGraphAdapter(object):
def mode(self, value): def mode(self, value):
self.model.mode = value self.model.mode = value
def train(self, inputs, labels, device='CPU', device_ids=None): def train(self, inputs, labels=None):
assert self.model._optimizer and self.model._loss_function, \ assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first" "model not ready, please call `model.prepare()` first"
self.mode = 'train' self.mode = 'train'
return self._run(inputs, labels, device, device_ids) return self._run(inputs, labels)
def eval(self, inputs, labels, device='CPU', device_ids=None): def eval(self, inputs, labels=None):
assert self.model._loss_function, \
"model not ready, please call `model.prepare()` first"
self.mode = 'eval' self.mode = 'eval'
return self._run(inputs, labels, device, device_ids) return self._run(inputs, labels)
def test(self, inputs, device='CPU', device_ids=None): def test(self, inputs):
self.mode = 'test' self.mode = 'test'
return self._run(inputs, None, device, device_ids) return self._run(inputs, None)
def parameters(self, *args, **kwargs): def parameters(self, *args, **kwargs):
return None return None
...@@ -162,8 +139,10 @@ class StaticGraphAdapter(object): ...@@ -162,8 +139,10 @@ class StaticGraphAdapter(object):
def _save(state, path): def _save(state, path):
if not state: if not state:
return return
state = {k: to_numpy(v) if isinstance(v, Variable) else v state = {
for k, v in state.items()} k: to_numpy(v) if isinstance(v, Variable) else v
for k, v in state.items()
}
with open(path, 'wb') as f: with open(path, 'wb') as f:
pickle.dump(state, f) pickle.dump(state, f)
...@@ -179,8 +158,10 @@ class StaticGraphAdapter(object): ...@@ -179,8 +158,10 @@ class StaticGraphAdapter(object):
return return
# XXX `optimizer.state_dict()` only work in dygraph mode # XXX `optimizer.state_dict()` only work in dygraph mode
optim_path = path + ".pdopt" optim_path = path + ".pdopt"
optim = {p.name: p for p in filter( optim = {
is_belong_to_optimizer, prog.list_vars())} p.name: p
for p in filter(is_belong_to_optimizer, prog.list_vars())
}
if not optim: if not optim:
return return
...@@ -219,19 +200,15 @@ class StaticGraphAdapter(object): ...@@ -219,19 +200,15 @@ class StaticGraphAdapter(object):
if optim_state is None: if optim_state is None:
return return
if self._executor is not None: self._load_optimizer(optim_state, executor)
self._load_optimizer(optim_state)
else:
self._lazy_load_optimizer = optim_state
def _load_optimizer(self, state): def _load_optimizer(self, state, executor):
prog = self._progs.get('train', None) prog = self._progs.get('train', None)
optim = list(filter(is_belong_to_optimizer, prog.list_vars())) optim = list(filter(is_belong_to_optimizer, prog.list_vars()))
if not optim: if not optim:
return return
fluid.core._create_loaded_parameter( fluid.core._create_loaded_parameter(optim, global_scope(), executor)
optim, global_scope(), self._executor._default_executor)
converted_state = dict(state) converted_state = dict(state)
for var in optim: for var in optim:
...@@ -261,16 +238,17 @@ class StaticGraphAdapter(object): ...@@ -261,16 +238,17 @@ class StaticGraphAdapter(object):
opt_cls_name = self.model._optimizer.__class__.__name__ opt_cls_name = self.model._optimizer.__class__.__name__
opt_unq_name = None opt_unq_name = None
for name in self.model._optimizer._accumulators.keys(): for name in self.model._optimizer._accumulators.keys():
accum_name = name if opt_name is None else name[ accum_name = name if opt_name is None else name[len(
len(opt_name) + 1:] opt_name) + 1:]
for param_name, state_var in self.model._optimizer._accumulators[ for param_name, state_var in self.model._optimizer._accumulators[
name].items(): name].items():
if opt_unq_name is None: if opt_unq_name is None:
# can not infer out the exact unique(opt_name), # can not infer out the exact unique(opt_name),
# thus try to extract rather than generate # thus try to extract rather than generate
for state_key in sorted(state.keys(), for state_key in sorted(
key=lambda x: len(x), state.keys(),
reverse=True): key=lambda x: len(x),
reverse=True):
prefix = param_name + "_" + ( prefix = param_name + "_" + (
opt_cls_name if opt_name is None else opt_cls_name if opt_name is None else
opt_name) + "_" opt_name) + "_"
...@@ -281,8 +259,8 @@ class StaticGraphAdapter(object): ...@@ -281,8 +259,8 @@ class StaticGraphAdapter(object):
param_name + "_"):prefix_offset] param_name + "_"):prefix_offset]
# TODO: assert # TODO: assert
# assert opt_unq_name is None # assert opt_unq_name is None
# gen(param.name + "_" + gen(opt_name) + "_" + accum_name) # gen(param.name + "_" + gen(opt_name) + "_" + accum_name)
# always end with "_0" since the unique optimizer._name # always end with "_0" since the unique optimizer._name
dy_state_name = (param_name + "_" + opt_unq_name + dy_state_name = (param_name + "_" + opt_unq_name +
"_" + accum_name + "_0") "_" + accum_name + "_0")
converted_state[ converted_state[
...@@ -307,21 +285,20 @@ class StaticGraphAdapter(object): ...@@ -307,21 +285,20 @@ class StaticGraphAdapter(object):
t.set(ndarray, place) t.set(ndarray, place)
def _run(self, inputs, labels=None, device='CPU', device_ids=None): def _run(self, inputs, labels=None):
compiled_prog = self._compiled_progs.get(self.mode, None)
assert compiled_prog, \
"Model is not ready, please call `model.prepare()` first"
inputs = to_list(inputs) inputs = to_list(inputs)
if labels is not None: if labels is not None:
labels = to_list(labels) labels = to_list(labels)
assert len(inputs) == len(self._input_desc), "number of inputs" \ assert len(inputs) == len(self._input_vars[self.mode]), \
"number of inputs" \
+ " does not match number of arguments of `forward` method" + " does not match number of arguments of `forward` method"
if self._progs.get(self.mode, None) is None:
self._make_program(self._infer_input_vars(inputs))
compiled_prog = self._compile_and_initialize(
self._progs[self.mode], device, device_ids)
feed = {} feed = {}
input_names = [name for name in self._input_desc.keys()] input_names = [v.name for v in self._input_vars[self.mode]]
for idx, n in enumerate(input_names): for idx, n in enumerate(input_names):
# train and test may take different arguments # train and test may take different arguments
if inputs[idx] is not None: if inputs[idx] is not None:
...@@ -333,79 +310,76 @@ class StaticGraphAdapter(object): ...@@ -333,79 +310,76 @@ class StaticGraphAdapter(object):
endpoints = self._endpoints[self.mode] endpoints = self._endpoints[self.mode]
fetch_list = endpoints['output'] + endpoints['loss'] fetch_list = endpoints['output'] + endpoints['loss']
num_output = len(endpoints['output']) num_output = len(endpoints['output'])
out = self._executor.run( out = self._executor.run(compiled_prog,
compiled_prog, feed=feed, feed=feed,
fetch_list=fetch_list) fetch_list=fetch_list)
if self.mode == 'test': if self.mode == 'test':
return out[:num_output] return out[:num_output]
else: else:
return out[:num_output], out[num_output:] return out[:num_output], out[num_output:]
def _make_program(self, inputs): def prepare(self):
modes = ['train', 'eval', 'test']
for mode in modes:
self._make_program(mode)
self._compile_and_initialize(self._progs[mode], mode)
def _make_program(self, mode):
prog = self._progs.get(mode, None)
if prog is not None:
return
prog = self._orig_prog.clone() prog = self._orig_prog.clone()
# change inputs to the same var in cloned program
inputs = fluid.layers.utils.map_structure(
lambda var: prog.global_block().var(var.name), inputs)
# NOTE: When defining learning rate scheduling in static-graph, ops to # NOTE: When defining learning rate scheduling in static-graph, ops to
# increase the global step var and calculate learning rate would be # increase the global step var and calculate learning rate would be
# prepended into _orig_prog. test program maked by `_orig_prog.clone` # prepended into _orig_prog. test program maked by `_orig_prog.clone`
# also would include these ops. Thus must prune these ops in test # also would include these ops. Thus must prune these ops in test
# program, otherwise the global step would be changed in test. # program, otherwise the global step would be changed in test.
if self.mode != 'train': if mode != 'train':
for op in list(prog.global_block().ops): for op in list(prog.global_block().ops):
prog.global_block()._remove_op(0) prog.global_block()._remove_op(0)
if self.mode == 'train' and self.model._optimizer._learning_rate_map: if mode == 'train' and self.model._optimizer \
and self.model._optimizer._learning_rate_map:
# HACK workaround learning rate map issue # HACK workaround learning rate map issue
lr_var = self.model._optimizer._learning_rate_map[self._orig_prog] lr_var = self.model._optimizer._learning_rate_map[self._orig_prog]
self.model._optimizer._learning_rate_map[prog] = lr_var self.model._optimizer._learning_rate_map[prog] = lr_var
losses = [] losses = []
with fluid.program_guard(prog, self._startup_prog): with fluid.program_guard(prog, self._startup_prog):
if isinstance(self.model._inputs, dict):
ins = [self.model._inputs[n] \
for n in extract_args(self.model.forward) if n != 'self']
else:
ins = self.model._inputs
lbls = self.model._labels if self.model._labels else []
inputs = [k.forward() for k in to_list(ins)]
labels = [k.forward() for k in to_list(lbls)]
outputs = to_list(self.model.forward(*inputs)) outputs = to_list(self.model.forward(*inputs))
if self.mode != 'test': if mode != 'test':
label_vars = self._infer_label_vars(outputs) if self.model._loss_function:
self._label_vars[self.mode] = label_vars losses = self.model._loss_function(outputs, labels)
losses = self.model._loss_function(outputs, label_vars) if mode == 'train' and self.model._optimizer:
if self.mode == 'train':
self._loss_endpoint = fluid.layers.sum(losses) self._loss_endpoint = fluid.layers.sum(losses)
self.model._optimizer.minimize(self._loss_endpoint) self.model._optimizer.minimize(self._loss_endpoint)
if self.mode != 'train': # clone again to put it in test mode if mode != 'train': # clone again to put it in test mode
prog = prog.clone(for_test=True) prog = prog.clone(for_test=True)
self._progs[self.mode] = prog
self._endpoints[self.mode] = {
"output": outputs,
"loss": losses
}
def _infer_input_vars(self, inputs): self._input_vars[mode] = inputs
input_vars = [] self._label_vars[mode] = labels
for idx, i in enumerate(inputs): self._progs[mode] = prog
if i is None: # train and test may take different arguments self._endpoints[mode] = {"output": outputs, "loss": losses}
input_vars.append(None)
continue def _compile_and_initialize(self, prog, mode):
ndarray = np.array(i) compiled_prog = self._compiled_progs.get(mode, None)
name = list(self._input_desc.keys())[idx]
shape = list(self._input_desc.values())[idx]
if shape is None:
shape = (None, ) + ndarray.shape[1:]
input_vars.append(fluid.data(name, shape, ndarray.dtype))
return input_vars
def _infer_label_vars(self, outputs):
shapes = self.model._loss_function.infer_shape(outputs)
dtypes = self.model._loss_function.infer_dtype(outputs)
label_vars = []
for idx, (shape, dtype) in enumerate(zip(shapes, dtypes)):
name = '__label{}'.format(idx)
label_vars.append(fluid.data(name, shape, dtype))
return label_vars
def _compile_and_initialize(self, prog, device='CPU', device_ids=None):
compiled_prog = self._compiled_progs.get(self.mode, None)
if compiled_prog is not None: if compiled_prog is not None:
return compiled_prog return compiled_prog
places = [device.lower() == 'gpu' and fluid.CUDAPlace(i) device = self.model._device
or fluid.CPUPlace() for i in device_ids] device_ids = self.model._device_ids
if device.lower() == 'gpu':
places = fluid.cuda_places(device_ids)
else:
places = fluid.cpu_places(len(device_ids) if device_ids else None)
# XXX *ALL WEIGHTS* should be initialized upon model construction # XXX *ALL WEIGHTS* should be initialized upon model construction
# even if `forward()` may run different code path for different mode # even if `forward()` may run different code path for different mode
...@@ -423,31 +397,14 @@ class StaticGraphAdapter(object): ...@@ -423,31 +397,14 @@ class StaticGraphAdapter(object):
startup_prog = self._startup_prog._prune(uninitialized) startup_prog = self._startup_prog._prune(uninitialized)
self._executor.run(startup_prog) self._executor.run(startup_prog)
if self.mode == 'train' and self._lazy_load_optimizer:
self._load_optimizer(self._lazy_load_optimizer)
self._lazy_load_optimizer = None
compiled_prog = fluid.CompiledProgram(prog) compiled_prog = fluid.CompiledProgram(prog)
if len(device_ids) > 1: if len(places) > 1:
loss_name = None loss_name = None
if self.mode == 'train' and self._loss_endpoint is not None: if mode == 'train' and self._loss_endpoint is not None:
loss_name = self._loss_endpoint.name loss_name = self._loss_endpoint.name
share_vars_from = None
if self.mode == 'eval' and 'train' in self._compiled_progs:
share_vars_from = self._compiled_progs['train']
# HACK invalidate eval program if is compiled before train program
# quite hackish, OTOH, it is generally uncommon that the eval
# program will be run before the train program
if self.mode == 'train' and 'eval' in self._compiled_progs:
del self._compiled_progs['eval']
compiled_prog = compiled_prog.with_data_parallel( compiled_prog = compiled_prog.with_data_parallel(
loss_name=loss_name, places=places, loss_name=loss_name, places=places)
share_vars_from=share_vars_from) self._compiled_progs[mode] = compiled_prog
self._compiled_progs[self.mode] = compiled_prog
return compiled_prog
class DynamicGraphAdapter(object): class DynamicGraphAdapter(object):
...@@ -464,13 +421,14 @@ class DynamicGraphAdapter(object): ...@@ -464,13 +421,14 @@ class DynamicGraphAdapter(object):
self.model.mode = value self.model.mode = value
# TODO multi device in dygraph mode not implemented at present time # TODO multi device in dygraph mode not implemented at present time
def train(self, inputs, labels, device='CPU', device_ids=None): def train(self, inputs, labels=None):
assert self.model._optimizer and self.model._loss_function, \ assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first" "model not ready, please call `model.prepare()` first"
super(Model, self.model).train() super(Model, self.model).train()
self.mode = 'train' self.mode = 'train'
inputs = to_list(inputs) inputs = to_list(inputs)
labels = to_list(labels) if labels is not None:
labels = to_list(labels)
outputs = self.model.forward(*[to_variable(x) for x in inputs]) outputs = self.model.forward(*[to_variable(x) for x in inputs])
losses = self.model._loss_function(outputs, labels) losses = self.model._loss_function(outputs, labels)
final_loss = fluid.layers.sum(losses) final_loss = fluid.layers.sum(losses)
...@@ -480,19 +438,25 @@ class DynamicGraphAdapter(object): ...@@ -480,19 +438,25 @@ class DynamicGraphAdapter(object):
return [to_numpy(o) for o in to_list(outputs)], \ return [to_numpy(o) for o in to_list(outputs)], \
[to_numpy(l) for l in losses] [to_numpy(l) for l in losses]
def eval(self, inputs, labels, device='CPU', device_ids=None): def eval(self, inputs, labels=None):
assert self.model._loss_function, \
"model not ready, please call `model.prepare()` first"
super(Model, self.model).eval() super(Model, self.model).eval()
self.mode = 'eval' self.mode = 'eval'
inputs = to_list(inputs) inputs = to_list(inputs)
labels = to_list(labels) if labels is not None:
labels = to_list(labels)
outputs = self.model.forward(*[to_variable(x) for x in inputs]) outputs = self.model.forward(*[to_variable(x) for x in inputs])
losses = self.model._loss_function(outputs, labels)
if self.model._loss_function:
losses = self.model._loss_function(outputs, labels)
else:
losses = []
# To be consistent with static graph
# return empty loss if loss_function is None
return [to_numpy(o) for o in to_list(outputs)], \ return [to_numpy(o) for o in to_list(outputs)], \
[to_numpy(l) for l in losses] [to_numpy(l) for l in losses]
def test(self, inputs, device='CPU', device_ids=None): def test(self, inputs):
super(Model, self.model).eval() super(Model, self.model).eval()
self.mode = 'test' self.mode = 'test'
inputs = [to_variable(x) for x in to_list(inputs)] inputs = [to_variable(x) for x in to_list(inputs)]
...@@ -528,9 +492,8 @@ class DynamicGraphAdapter(object): ...@@ -528,9 +492,8 @@ class DynamicGraphAdapter(object):
opt_cls_name = self.model._optimizer.__class__.__name__ opt_cls_name = self.model._optimizer.__class__.__name__
opt_name = opt_unq_name[:opt_unq_name.rfind("_")] # remove suffix idx opt_name = opt_unq_name[:opt_unq_name.rfind("_")] # remove suffix idx
param_names = [param.name for param in self.model.parameters()] param_names = [param.name for param in self.model.parameters()]
for var_name, state_var in sorted(optim.items(), for var_name, state_var in sorted(
key=lambda x: len(x[0]), optim.items(), key=lambda x: len(x[0]), reverse=True):
reverse=True):
if var_name in ["@LR_DECAY_COUNTER@", "global_step"]: if var_name in ["@LR_DECAY_COUNTER@", "global_step"]:
# NOTE: dygraph saved global_step is 1 larger than that in # NOTE: dygraph saved global_step is 1 larger than that in
# static-graph, since the time of global_step to increase is # static-graph, since the time of global_step to increase is
...@@ -564,11 +527,21 @@ class DynamicGraphAdapter(object): ...@@ -564,11 +527,21 @@ class DynamicGraphAdapter(object):
class Model(fluid.dygraph.Layer): class Model(fluid.dygraph.Layer):
"""
FIXME: add more comments and usage
"""
def __init__(self): def __init__(self):
super(Model, self).__init__(self.__class__.__name__) super(Model, self).__init__(self.__class__.__name__)
self.mode = 'train' self.mode = 'train'
self._inputs = None
self._labels = None
self._loss_function = None self._loss_function = None
self._loss_weights = None self._loss_weights = None
self._loss = None
self._optimizer = None
self._device = None
self._device_ids = None
self._optimizer = None self._optimizer = None
if in_dygraph_mode(): if in_dygraph_mode():
self._adapter = DynamicGraphAdapter(self) self._adapter = DynamicGraphAdapter(self)
...@@ -590,11 +563,65 @@ class Model(fluid.dygraph.Layer): ...@@ -590,11 +563,65 @@ class Model(fluid.dygraph.Layer):
def load(self, *args, **kwargs): def load(self, *args, **kwargs):
return self._adapter.load(*args, **kwargs) return self._adapter.load(*args, **kwargs)
def prepare(self, optimizer, loss_function): def prepare(self,
optimizer=None,
loss_function=None,
inputs=None,
labels=None,
device=None,
device_ids=None):
"""
FIXME: add comments
Args:
optimizer (Optimizer|None): optimizer must be set in training
and should be a Optimizer instance. It can be None in eval
and test mode.
loss_function (Loss|None): loss function must be set in training
and should be a Loss instance. It can be None when there is
no loss.
inputs (Input|list|dict|None): inputs, entry points of network,
could be a Input layer, or lits of Input layers,
or dict (name: Input), or None. For static graph,
inputs must be set. For dynamic graph, it could be None.
labels (Input|list|None): labels, entry points of network,
could be a Input layer or lits of Input layers, or None.
For static graph, if set loss_function in Model.prepare(), it
must be set. Otherwise, it could be None.
device (str|None): specify device type, 'CPU' or 'GPU'.
If None, automatically select device according to
installation package version.
device_ids (list[int]|None): specify device index. If None,
the available device will be obtained from the environment
variable when the model is executed: If the GPU is used, the
currently available device ID is obtained from the environment
variable FLAGS_selected_gpus or CUDA_VISIBLE_DEVICES when the
model is executed; CPU, when the model is executed,
the currently available CPU number is obtained from the
environment variable CPU_NUM. For example, export CPU_NUM=4,
if the environment variable is not set, the executor will add
the variable to the environment variable and set its value to 1.
The default is None.
"""
self._optimizer = optimizer self._optimizer = optimizer
assert isinstance(loss_function, Loss), \ if loss_function:
"'loss_function' must be sub classes of 'Loss'" if not isinstance(loss_function, Loss):
raise TypeError(
"'loss_function' must be sub classes of 'Loss'")
self._loss_function = loss_function self._loss_function = loss_function
if not in_dygraph_mode():
if not isinstance(inputs, (list, dict, Input)):
raise TypeError(
"'inputs' must be list or dict in static graph mode")
if loss_function and not isinstance(labels, (list, Input)):
raise TypeError("'labels' must be list in static graph mode")
self._inputs = inputs
self._labels = labels
self._device = device
if device is None:
self._device = 'GPU' if fluid.is_compiled_with_cuda() else 'CPU'
self._device_ids = device_ids
if not in_dygraph_mode():
self._adapter.prepare()
def parameters(self, *args, **kwargs): def parameters(self, *args, **kwargs):
return self._adapter.parameters(*args, **kwargs) return self._adapter.parameters(*args, **kwargs)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册