提交 36da5e38 编写于 作者: Q qingqing01

Refince Loss in Model

* 1. remove shape_hints and add Input
* 2. remove infer_shape and infer_dtype in Loss
* 3. update mnist example, see it for usage
上级 71075698
...@@ -26,7 +26,7 @@ from paddle import fluid ...@@ -26,7 +26,7 @@ from paddle import fluid
from paddle.fluid.optimizer import Momentum from paddle.fluid.optimizer import Momentum
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from model import Model, CrossEntropy from model import Model, CrossEntropy, Input
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
...@@ -76,9 +76,8 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -76,9 +76,8 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
class MNIST(Model): class MNIST(Model):
def __init__(self): def __init__(self, inputs, targets=None):
super(MNIST, self).__init__() super(MNIST, self).__init__(inputs, targets)
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
1, 20, 5, 2, 2, act="relu") 1, 20, 5, 2, 2, act="relu")
...@@ -140,7 +139,9 @@ def main(): ...@@ -140,7 +139,9 @@ def main():
device_ids = list(range(FLAGS.num_devices)) device_ids = list(range(FLAGS.num_devices))
with guard: with guard:
model = MNIST() inputs=[Input([None, 1, 28, 28], 'float32', name='image')]
labels=[Input([None, 1], 'int64', name='label')]
model = MNIST(inputs, labels)
optim = Momentum(learning_rate=FLAGS.lr, momentum=.9, optim = Momentum(learning_rate=FLAGS.lr, momentum=.9,
parameter_list=model.parameters()) parameter_list=model.parameters())
model.prepare(optim, CrossEntropy()) model.prepare(optim, CrossEntropy())
......
...@@ -27,7 +27,17 @@ from paddle.fluid.executor import global_scope ...@@ -27,7 +27,17 @@ from paddle.fluid.executor import global_scope
from paddle.fluid.io import is_belong_to_optimizer from paddle.fluid.io import is_belong_to_optimizer
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
__all__ = ['shape_hints', 'Model', 'Loss', 'CrossEntropy'] __all__ = ['Model', 'Loss', 'CrossEntropy', 'Input']
class Input(fluid.dygraph.Layer):
def __init__(self, shape=None, dtype=None, name=None):
self.shape = shape
self.dtype = dtype
self.name = name
def forward(self):
return fluid.data(self.name, shape=self.shape, dtype=self.dtype)
def to_list(value): def to_list(value):
...@@ -44,40 +54,11 @@ def to_numpy(var): ...@@ -44,40 +54,11 @@ def to_numpy(var):
return np.array(t) return np.array(t)
def extract_args(func):
if hasattr(inspect, 'getfullargspec'):
return inspect.getfullargspec(func)[0]
else:
return inspect.getargspec(func)[0]
def shape_hints(**hints):
assert hints, "hints can not be empty"
assert all(isinstance(h, (list, tuple)) for h in hints.values()), \
"shape hint must be a list or tuple"
def wrapper(func):
args = extract_args(func)
invalid = set(hints.keys()) - set(args)
assert not invalid, \
"shape hint for arguments that are not present in forward method" \
+ ": ({})".format(", ".join(invalid))
func.shape_hints = hints
return func
return wrapper
class Loss(object): class Loss(object):
def __init__(self, average=True): def __init__(self, average=True):
super(Loss, self).__init__() super(Loss, self).__init__()
self.average = average self.average = average
def infer_shape(self, outputs):
return [o.shape for o in outputs]
def infer_dtype(self, outputs):
return [o.dtype for o in outputs]
def forward(self, outputs, labels): def forward(self, outputs, labels):
raise NotImplementedError() raise NotImplementedError()
...@@ -86,24 +67,21 @@ class Loss(object): ...@@ -86,24 +67,21 @@ class Loss(object):
if in_dygraph_mode(): if in_dygraph_mode():
labels = [to_variable(l) for l in labels] labels = [to_variable(l) for l in labels]
losses = to_list(self.forward(to_list(outputs), labels)) losses = to_list(self.forward(to_list(outputs), labels))
if not self.average: if self.average:
losses = [fluid.layers.reduce_mean(l) for l in losses]
else:
losses = [fluid.layers.reduce_sum(l) for l in losses]
return losses return losses
return [fluid.layers.reduce_mean(l) for l in losses]
class CrossEntropy(Loss): class CrossEntropy(Loss):
def __init__(self): def __init__(self, average=True):
super(CrossEntropy, self).__init__() super(CrossEntropy, self).__init__()
def infer_shape(self, outputs):
return [o.shape[:-1] + (1, ) for o in outputs]
def infer_dtype(self, outputs):
return ['int64' for _ in outputs]
def forward(self, outputs, labels): def forward(self, outputs, labels):
return [fluid.layers.cross_entropy(o, l) for o, l in zip( return [
outputs, labels)] fluid.layers.cross_entropy(o, l) for o, l in zip(outputs, labels)
]
class StaticGraphAdapter(object): class StaticGraphAdapter(object):
...@@ -116,6 +94,7 @@ class StaticGraphAdapter(object): ...@@ -116,6 +94,7 @@ class StaticGraphAdapter(object):
self._orig_prog = fluid.default_main_program() self._orig_prog = fluid.default_main_program()
self._label_vars = {} # label variables self._label_vars = {} # label variables
self._input_vars = {} # label variables
self._endpoints = {} self._endpoints = {}
self._loss_endpoint = None self._loss_endpoint = None
self._executor = None self._executor = None
...@@ -124,13 +103,6 @@ class StaticGraphAdapter(object): ...@@ -124,13 +103,6 @@ class StaticGraphAdapter(object):
self._lazy_load_optimizer = None self._lazy_load_optimizer = None
# parse shape hints
self._input_desc = OrderedDict([
(n, None) for n in extract_args(self.model.forward) if n != 'self'
])
if hasattr(self.model.forward, 'shape_hints'):
self._input_desc.update(self.model.forward.shape_hints)
@property @property
def mode(self): def mode(self):
return self.model.mode return self.model.mode
...@@ -139,15 +111,13 @@ class StaticGraphAdapter(object): ...@@ -139,15 +111,13 @@ class StaticGraphAdapter(object):
def mode(self, value): def mode(self, value):
self.model.mode = value self.model.mode = value
def train(self, inputs, labels, device='CPU', device_ids=None): def train(self, inputs, labels=None, device='CPU', device_ids=None):
assert self.model._optimizer and self.model._loss_function, \ assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first" "model not ready, please call `model.prepare()` first"
self.mode = 'train' self.mode = 'train'
return self._run(inputs, labels, device, device_ids) return self._run(inputs, labels, device, device_ids)
def eval(self, inputs, labels, device='CPU', device_ids=None): def eval(self, inputs, labels=None, device='CPU', device_ids=None):
assert self.model._loss_function, \
"model not ready, please call `model.prepare()` first"
self.mode = 'eval' self.mode = 'eval'
return self._run(inputs, labels, device, device_ids) return self._run(inputs, labels, device, device_ids)
...@@ -162,8 +132,10 @@ class StaticGraphAdapter(object): ...@@ -162,8 +132,10 @@ class StaticGraphAdapter(object):
def _save(state, path): def _save(state, path):
if not state: if not state:
return return
state = {k: to_numpy(v) if isinstance(v, Variable) else v state = {
for k, v in state.items()} k: to_numpy(v) if isinstance(v, Variable) else v
for k, v in state.items()
}
with open(path, 'wb') as f: with open(path, 'wb') as f:
pickle.dump(state, f) pickle.dump(state, f)
...@@ -176,8 +148,10 @@ class StaticGraphAdapter(object): ...@@ -176,8 +148,10 @@ class StaticGraphAdapter(object):
return return
# XXX `optimizer.state_dict()` only work in dygraph mode # XXX `optimizer.state_dict()` only work in dygraph mode
optim_path = path + ".pdopt" optim_path = path + ".pdopt"
optim = {p.name: p for p in filter( optim = {
is_belong_to_optimizer, prog.list_vars())} p.name: p
for p in filter(is_belong_to_optimizer, prog.list_vars())
}
if not optim: if not optim:
return return
# HACK this is contrived, optimizer state is not the same for # HACK this is contrived, optimizer state is not the same for
...@@ -231,8 +205,9 @@ class StaticGraphAdapter(object): ...@@ -231,8 +205,9 @@ class StaticGraphAdapter(object):
if not optim: if not optim:
return return
fluid.core._create_loaded_parameter( fluid.core._create_loaded_parameter(optim,
optim, global_scope(), self._executor._default_executor) global_scope(),
self._executor._default_executor)
for var in optim: for var in optim:
assert var.name in state, \ assert var.name in state, \
...@@ -257,17 +232,22 @@ class StaticGraphAdapter(object): ...@@ -257,17 +232,22 @@ class StaticGraphAdapter(object):
inputs = to_list(inputs) inputs = to_list(inputs)
if labels is not None: if labels is not None:
labels = to_list(labels) labels = to_list(labels)
assert len(inputs) == len(self._input_desc), "number of inputs" \ assert len(inputs) == len(self.model._inputs), "number of inputs" \
+ " does not match number of arguments of `forward` method" + " does not match number of arguments of `forward` method"
if self._progs.get(self.mode, None) is None: if self._progs.get(self.mode, None) is None:
self._make_program(self._infer_input_vars(inputs)) if self.model._inputs is None:
raise ValueError("The inputs of Model must be not None.")
self._input_vars = [
k.forward() for k in to_list(self.model._inputs)
]
self._make_program(self._input_vars)
compiled_prog = self._compile_and_initialize( compiled_prog = self._compile_and_initialize(self._progs[self.mode],
self._progs[self.mode], device, device_ids) device, device_ids)
feed = {} feed = {}
input_names = [name for name in self._input_desc.keys()] input_names = [v.name for v in self._input_vars]
for idx, n in enumerate(input_names): for idx, n in enumerate(input_names):
# train and test may take different arguments # train and test may take different arguments
if inputs[idx] is not None: if inputs[idx] is not None:
...@@ -277,10 +257,12 @@ class StaticGraphAdapter(object): ...@@ -277,10 +257,12 @@ class StaticGraphAdapter(object):
feed[v.name] = labels[idx] feed[v.name] = labels[idx]
endpoints = self._endpoints[self.mode] endpoints = self._endpoints[self.mode]
fetch_list = endpoints['output'] + endpoints['loss'] fetch_list = endpoints['output']
if 'loss' in endpoints:
fetch_list += endpoints['loss']
num_output = len(endpoints['output']) num_output = len(endpoints['output'])
out = self._executor.run( out = self._executor.run(compiled_prog,
compiled_prog, feed=feed, feed=feed,
fetch_list=fetch_list) fetch_list=fetch_list)
if self.mode == 'test': if self.mode == 'test':
return out[:num_output] return out[:num_output]
...@@ -297,9 +279,7 @@ class StaticGraphAdapter(object): ...@@ -297,9 +279,7 @@ class StaticGraphAdapter(object):
with fluid.program_guard(prog, self._startup_prog): with fluid.program_guard(prog, self._startup_prog):
outputs = to_list(self.model.forward(*inputs)) outputs = to_list(self.model.forward(*inputs))
if self.mode != 'test': if self.mode != 'test':
label_vars = self._infer_label_vars(outputs) losses = self._get_loss(outputs)
self._label_vars[self.mode] = label_vars
losses = self.model._loss_function(outputs, label_vars)
if self.mode == 'train': if self.mode == 'train':
self._loss_endpoint = fluid.layers.sum(losses) self._loss_endpoint = fluid.layers.sum(losses)
self.model._optimizer.minimize(self._loss_endpoint) self.model._optimizer.minimize(self._loss_endpoint)
...@@ -309,38 +289,35 @@ class StaticGraphAdapter(object): ...@@ -309,38 +289,35 @@ class StaticGraphAdapter(object):
self._endpoints[self.mode] = { self._endpoints[self.mode] = {
"output": outputs, "output": outputs,
"loss": losses "loss": losses
} if self.model._loss_function else {
'output': outputs
} }
def _infer_input_vars(self, inputs): def _get_loss(self, outputs):
input_vars = [] if self.model._loss_function and self.model._loss:
for idx, i in enumerate(inputs): raise ValueError(
if i is None: # train and test may take different arguments "Do not set loss by model.set_loss() and "
input_vars.append(None) "loss_function in model.prepare() at the same time.")
continue if self.model._loss_function is not None:
ndarray = np.array(i) if self.model._labels is None:
name = list(self._input_desc.keys())[idx] raise ValueError("The labels of Model must be not None.")
shape = list(self._input_desc.values())[idx] label_vars = [k.forward() for k in to_list(self.model._labels)]
if shape is None: self._label_vars[self.mode] = label_vars
shape = (None, ) + ndarray.shape[1:] losses = self.model._loss_function(outputs, label_vars)
input_vars.append(fluid.data(name, shape, ndarray.dtype)) else:
return input_vars assert self.model._loss
losses = to_list(self.model._loss)
def _infer_label_vars(self, outputs): return losses
shapes = self.model._loss_function.infer_shape(outputs)
dtypes = self.model._loss_function.infer_dtype(outputs)
label_vars = []
for idx, (shape, dtype) in enumerate(zip(shapes, dtypes)):
name = '__label{}'.format(idx)
label_vars.append(fluid.data(name, shape, dtype))
return label_vars
def _compile_and_initialize(self, prog, device='CPU', device_ids=None): def _compile_and_initialize(self, prog, device='CPU', device_ids=None):
compiled_prog = self._compiled_progs.get(self.mode, None) compiled_prog = self._compiled_progs.get(self.mode, None)
if compiled_prog is not None: if compiled_prog is not None:
return compiled_prog return compiled_prog
places = [device.lower() == 'gpu' and fluid.CUDAPlace(i) places = [
or fluid.CPUPlace() for i in device_ids] device.lower() == 'gpu' and fluid.CUDAPlace(i) or fluid.CPUPlace()
for i in device_ids
]
# XXX *ALL WEIGHTS* should be initialized upon model construction # XXX *ALL WEIGHTS* should be initialized upon model construction
# even if `forward()` may run different code path for different mode # even if `forward()` may run different code path for different mode
...@@ -378,7 +355,8 @@ class StaticGraphAdapter(object): ...@@ -378,7 +355,8 @@ class StaticGraphAdapter(object):
del self._compiled_progs['eval'] del self._compiled_progs['eval']
compiled_prog = compiled_prog.with_data_parallel( compiled_prog = compiled_prog.with_data_parallel(
loss_name=loss_name, places=places, loss_name=loss_name,
places=places,
share_vars_from=share_vars_from) share_vars_from=share_vars_from)
self._compiled_progs[self.mode] = compiled_prog self._compiled_progs[self.mode] = compiled_prog
...@@ -399,15 +377,16 @@ class DynamicGraphAdapter(object): ...@@ -399,15 +377,16 @@ class DynamicGraphAdapter(object):
self.model.mode = value self.model.mode = value
# TODO multi device in dygraph mode not implemented at present time # TODO multi device in dygraph mode not implemented at present time
def train(self, inputs, labels, device='CPU', device_ids=None): def train(self, inputs, labels=None, device='CPU', device_ids=None):
assert self.model._optimizer and self.model._loss_function, \ assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first" "model not ready, please call `model.prepare()` first"
super(Model, self.model).train() super(Model, self.model).train()
self.mode = 'train' self.mode = 'train'
inputs = to_list(inputs) inputs = to_list(inputs)
if labels is not None:
labels = to_list(labels) labels = to_list(labels)
outputs = self.model.forward(*[to_variable(x) for x in inputs]) outputs = self.model.forward(*[to_variable(x) for x in inputs])
losses = self.model._loss_function(outputs, labels) losses = self._get_loss(outputs, labels)
final_loss = fluid.layers.sum(losses) final_loss = fluid.layers.sum(losses)
final_loss.backward() final_loss.backward()
self.model._optimizer.minimize(final_loss) self.model._optimizer.minimize(final_loss)
...@@ -415,15 +394,14 @@ class DynamicGraphAdapter(object): ...@@ -415,15 +394,14 @@ class DynamicGraphAdapter(object):
return [to_numpy(o) for o in to_list(outputs)], \ return [to_numpy(o) for o in to_list(outputs)], \
[to_numpy(l) for l in losses] [to_numpy(l) for l in losses]
def eval(self, inputs, labels, device='CPU', device_ids=None): def eval(self, inputs, labels=None, device='CPU', device_ids=None):
assert self.model._loss_function, \
"model not ready, please call `model.prepare()` first"
super(Model, self.model).eval() super(Model, self.model).eval()
self.mode = 'eval' self.mode = 'eval'
inputs = to_list(inputs) inputs = to_list(inputs)
if labels is not None:
labels = to_list(labels) labels = to_list(labels)
outputs = self.model.forward(*[to_variable(x) for x in inputs]) outputs = self.model.forward(*[to_variable(x) for x in inputs])
losses = self.model._loss_function(outputs, labels) losses = self._get_loss(outputs, labels)
return [to_numpy(o) for o in to_list(outputs)], \ return [to_numpy(o) for o in to_list(outputs)], \
[to_numpy(l) for l in losses] [to_numpy(l) for l in losses]
...@@ -434,6 +412,16 @@ class DynamicGraphAdapter(object): ...@@ -434,6 +412,16 @@ class DynamicGraphAdapter(object):
outputs = self.model.forward(*inputs) outputs = self.model.forward(*inputs)
return [to_numpy(o) for o in to_list(outputs)] return [to_numpy(o) for o in to_list(outputs)]
def _get_loss(self, outputs, labels):
if self.model._loss_function and self.model._loss:
raise ValueError(
"Do not set loss by model.set_loss() and "
"loss_function in model.prepare() at the same time.")
if self.model._loss_function is not None:
return self.model._loss_function(outputs, labels)
else:
return to_list(self.model._loss)
def parameters(self, *args, **kwargs): def parameters(self, *args, **kwargs):
return super(Model, self.model).parameters(*args, **kwargs) return super(Model, self.model).parameters(*args, **kwargs)
...@@ -455,11 +443,14 @@ class DynamicGraphAdapter(object): ...@@ -455,11 +443,14 @@ class DynamicGraphAdapter(object):
class Model(fluid.dygraph.Layer): class Model(fluid.dygraph.Layer):
def __init__(self): def __init__(self, inputs=None, labels=None):
super(Model, self).__init__(self.__class__.__name__) super(Model, self).__init__(self.__class__.__name__)
self.mode = 'train' self.mode = 'train'
self._inputs = inputs
self._labels = labels
self._loss_function = None self._loss_function = None
self._loss_weights = None self._loss_weights = None
self._loss = None
self._optimizer = None self._optimizer = None
if in_dygraph_mode(): if in_dygraph_mode():
self._adapter = DynamicGraphAdapter(self) self._adapter = DynamicGraphAdapter(self)
...@@ -481,11 +472,22 @@ class Model(fluid.dygraph.Layer): ...@@ -481,11 +472,22 @@ class Model(fluid.dygraph.Layer):
def load(self, *args, **kwargs): def load(self, *args, **kwargs):
return self._adapter.load(*args, **kwargs) return self._adapter.load(*args, **kwargs)
def prepare(self, optimizer, loss_function): def prepare(self, optimizer, loss_function=None):
self._optimizer = optimizer self._optimizer = optimizer
assert isinstance(loss_function, Loss), \ if loss_function:
"'loss_function' must be sub classes of 'Loss'" if not isinstance(loss_function, Loss):
raise TypeError(
"'loss_function' must be sub classes of 'Loss'")
self._loss_function = loss_function self._loss_function = loss_function
def parameters(self, *args, **kwargs): def parameters(self, *args, **kwargs):
return self._adapter.parameters(*args, **kwargs) return self._adapter.parameters(*args, **kwargs)
def set_loss(self, loss):
if loss and self._loss_function:
raise ValueError(
"Do not set loss by model.set_loss() and "
"loss_function in model.prepare() at the same time.")
if not isinstance(loss, (Variable, fluid.core.VarBase)):
raise TypeError("loss type should be a Variable or VarBase.")
self._loss = loss
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册