提交 57f4170d 编写于 作者: Y Yang Zhang

Refactor loss handling

上级 e595200f
...@@ -21,7 +21,7 @@ from paddle import fluid ...@@ -21,7 +21,7 @@ from paddle import fluid
from paddle.fluid.optimizer import MomentumOptimizer from paddle.fluid.optimizer import MomentumOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from model import Model, shape_hints from model import Model, shape_hints, CrossEntropy
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
...@@ -132,7 +132,7 @@ if __name__ == '__main__': ...@@ -132,7 +132,7 @@ if __name__ == '__main__':
sgd = MomentumOptimizer(learning_rate=1e-3, momentum=0.9, sgd = MomentumOptimizer(learning_rate=1e-3, momentum=0.9,
parameter_list=model.parameters()) parameter_list=model.parameters())
# sgd = SGDOptimizer(learning_rate=1e-3) # sgd = SGDOptimizer(learning_rate=1e-3)
model.prepare(sgd, 'cross_entropy') model.prepare(sgd, CrossEntropy())
for e in range(2): for e in range(2):
for idx, batch in enumerate(train_loader()): for idx, batch in enumerate(train_loader()):
......
...@@ -27,11 +27,7 @@ from paddle.fluid.executor import global_scope ...@@ -27,11 +27,7 @@ from paddle.fluid.executor import global_scope
from paddle.fluid.io import is_belong_to_optimizer from paddle.fluid.io import is_belong_to_optimizer
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
__all__ = ['Model', 'shape_hints'] __all__ = ['shape_hints', 'Model', 'Loss', 'CrossEntropy']
LOSS_DTYPE_MAP = {
'cross_entropy': 'int64'
}
def to_list(value): def to_list(value):
...@@ -71,6 +67,45 @@ def shape_hints(**hints): ...@@ -71,6 +67,45 @@ def shape_hints(**hints):
return wrapper return wrapper
class Loss(object):
def __init__(self, average=True):
super(Loss, self).__init__()
self.average = average
def infer_shape(self, outputs):
return [o.shape for o in outputs]
def infer_dtype(self, outputs):
return [o.dtype for o in outputs]
def forward(self, outputs, labels):
raise NotImplementedError()
def __call__(self, outputs, labels):
labels = to_list(labels)
if in_dygraph_mode():
labels = [to_variable(l) for l in labels]
losses = to_list(self.forward(to_list(outputs), labels))
if not self.average:
return losses
return [fluid.layers.reduce_mean(l) for l in losses]
class CrossEntropy(Loss):
def __init__(self):
super(CrossEntropy, self).__init__()
def infer_shape(self, outputs):
return [o.shape[:-1] + (1, ) for o in outputs]
def infer_dtype(self, outputs):
return ['int64' for _ in outputs]
def forward(self, outputs, labels):
return [fluid.layers.cross_entropy(o, l) for o, l in zip(
outputs, labels)]
class StaticGraphAdapter(object): class StaticGraphAdapter(object):
def __init__(self, model): def __init__(self, model):
super(StaticGraphAdapter, self).__init__() super(StaticGraphAdapter, self).__init__()
...@@ -103,13 +138,13 @@ class StaticGraphAdapter(object): ...@@ -103,13 +138,13 @@ class StaticGraphAdapter(object):
self.model.mode = value self.model.mode = value
def train(self, inputs, labels, device='CPU', device_ids=None): def train(self, inputs, labels, device='CPU', device_ids=None):
assert self.model._optimizer and self.model._loss_functions, \ assert self.model._optimizer and self.model._loss_function, \
"model not ready, please call `model.prepare()` first" "model not ready, please call `model.prepare()` first"
self.mode = 'train' self.mode = 'train'
return self._run(inputs, labels, device, device_ids) return self._run(inputs, labels, device, device_ids)
def eval(self, inputs, labels, device='CPU', device_ids=None): def eval(self, inputs, labels, device='CPU', device_ids=None):
assert self.model._loss_functions, \ assert self.model._loss_function, \
"model not ready, please call `model.prepare()` first" "model not ready, please call `model.prepare()` first"
self.mode = 'eval' self.mode = 'eval'
return self._run(inputs, labels, device, device_ids) return self._run(inputs, labels, device, device_ids)
...@@ -249,22 +284,10 @@ class StaticGraphAdapter(object): ...@@ -249,22 +284,10 @@ class StaticGraphAdapter(object):
losses = [] losses = []
with fluid.program_guard(prog, self._startup_prog): with fluid.program_guard(prog, self._startup_prog):
outputs = to_list(self.model.forward(*inputs)) outputs = to_list(self.model.forward(*inputs))
losses = []
label_vars = []
if self.mode != 'test': if self.mode != 'test':
loss_weights = self.model._loss_weights label_vars = self._infer_label_vars(outputs)
if loss_weights is None:
loss_weights = [1. for _ in self.model._loss_functions]
for o, l, w in zip(outputs, self.model._loss_functions,
loss_weights):
if l is None:
continue
label_var = self._infer_label_var(o, l)
label_vars.append(label_var)
loss_fn = getattr(fluid.layers, l)
loss = loss_fn(o, label_var)
losses.append(fluid.layers.reduce_mean(loss) * w)
self._label_vars[self.mode] = label_vars self._label_vars[self.mode] = label_vars
losses = self.model._loss_function(outputs, label_vars)
if self.mode == 'train': if self.mode == 'train':
self._loss_endpoint = fluid.layers.sum(losses) self._loss_endpoint = fluid.layers.sum(losses)
self.model._optimizer.minimize(self._loss_endpoint) self.model._optimizer.minimize(self._loss_endpoint)
...@@ -288,18 +311,14 @@ class StaticGraphAdapter(object): ...@@ -288,18 +311,14 @@ class StaticGraphAdapter(object):
input_vars.append(fluid.data(name, shape, ndarray.dtype)) input_vars.append(fluid.data(name, shape, ndarray.dtype))
return input_vars return input_vars
# TODO wrap loss in callable classes def _infer_label_vars(self, outputs):
# - same call signaure shapes = self.model._loss_function.infer_shape(outputs)
# - infer_shape method? or same shape as y_pred (e.g., one hot) dtypes = self.model._loss_function.infer_dtype(outputs)
# - split multiple dtype loss functions (e.g., soft label) label_vars = []
def _infer_label_var(self, output, loss): for idx, (shape, dtype) in enumerate(zip(shapes, dtypes)):
name = output.name + '.label' name = '__label{}'.format(idx)
shape = output.shape label_vars.append(fluid.data(name, shape, dtype))
# XXX could get ugly very quickly return label_vars
if loss == 'cross_entropy':
shape = shape[:-1] + (1, )
dtype = LOSS_DTYPE_MAP.get(loss, output.dtype)
return fluid.data(name, shape, dtype)
def _compile_and_initialize(self, prog, device='CPU', device_ids=None): def _compile_and_initialize(self, prog, device='CPU', device_ids=None):
if device.lower() == 'cpu': if device.lower() == 'cpu':
...@@ -351,14 +370,14 @@ class DynamicGraphAdapter(object): ...@@ -351,14 +370,14 @@ class DynamicGraphAdapter(object):
# TODO multi device in dygraph mode not implemented at present time # TODO multi device in dygraph mode not implemented at present time
def train(self, inputs, labels, device='CPU', device_ids=None): def train(self, inputs, labels, device='CPU', device_ids=None):
assert self.model._optimizer and self.model._loss_functions, \ assert self.model._optimizer and self.model._loss_function, \
"model not ready, please call `model.prepare()` first" "model not ready, please call `model.prepare()` first"
super(Model, self.model).train() super(Model, self.model).train()
self.mode = 'train' self.mode = 'train'
inputs = to_list(inputs) inputs = to_list(inputs)
labels = to_list(labels) labels = to_list(labels)
outputs = self.model.forward(*[to_variable(x) for x in inputs]) outputs = self.model.forward(*[to_variable(x) for x in inputs])
losses = self._loss(outputs, labels) losses = self.model._loss_function(outputs, labels)
final_loss = fluid.layers.sum(losses) final_loss = fluid.layers.sum(losses)
final_loss.backward() final_loss.backward()
self.model._optimizer.minimize(final_loss) self.model._optimizer.minimize(final_loss)
...@@ -367,14 +386,14 @@ class DynamicGraphAdapter(object): ...@@ -367,14 +386,14 @@ class DynamicGraphAdapter(object):
[to_numpy(l) for l in losses] [to_numpy(l) for l in losses]
def eval(self, inputs, labels, device='CPU', device_ids=None): def eval(self, inputs, labels, device='CPU', device_ids=None):
assert self.model._loss_functions, \ assert self.model._loss_function, \
"model not ready, please call `model.prepare()` first" "model not ready, please call `model.prepare()` first"
super(Model, self.model).eval() super(Model, self.model).eval()
self.mode = 'eval' self.mode = 'eval'
inputs = to_list(inputs) inputs = to_list(inputs)
labels = to_list(labels) labels = to_list(labels)
outputs = self.model.forward(*[to_variable(x) for x in inputs]) outputs = self.model.forward(*[to_variable(x) for x in inputs])
losses = self._loss(outputs, labels) losses = self.model._loss_function(outputs, labels)
return [to_numpy(o) for o in to_list(outputs)], \ return [to_numpy(o) for o in to_list(outputs)], \
[to_numpy(l) for l in losses] [to_numpy(l) for l in losses]
...@@ -404,26 +423,12 @@ class DynamicGraphAdapter(object): ...@@ -404,26 +423,12 @@ class DynamicGraphAdapter(object):
return return
self.model._optimizer.set_dict(optim) self.model._optimizer.set_dict(optim)
def _loss(self, pred, labels):
losses = []
loss_weights = self.model._loss_weights
if loss_weights is None:
loss_weights = [1. for _ in self.model._loss_functions]
for o, l, w, t in zip(to_list(pred), self.model._loss_functions,
loss_weights, labels):
if l is None:
continue
loss_fn = getattr(fluid.layers, l)
loss = loss_fn(o, to_variable(t))
losses.append(fluid.layers.reduce_mean(loss) * w)
return losses
class Model(fluid.dygraph.Layer): class Model(fluid.dygraph.Layer):
def __init__(self): def __init__(self):
super(Model, self).__init__(self.__class__.__name__) super(Model, self).__init__(self.__class__.__name__)
self.mode = 'train' self.mode = 'train'
self._loss_functions = [] self._loss_function = None
self._loss_weights = None self._loss_weights = None
self._optimizer = None self._optimizer = None
if in_dygraph_mode(): if in_dygraph_mode():
...@@ -446,11 +451,11 @@ class Model(fluid.dygraph.Layer): ...@@ -446,11 +451,11 @@ class Model(fluid.dygraph.Layer):
def load(self, *args, **kwargs): def load(self, *args, **kwargs):
return self._adapter.load(*args, **kwargs) return self._adapter.load(*args, **kwargs)
def prepare(self, optimizer, loss_functions, loss_weights=None): def prepare(self, optimizer, loss_function):
self._optimizer = optimizer self._optimizer = optimizer
self._loss_functions = to_list(loss_functions) assert isinstance(loss_function, Loss), \
if loss_weights is not None: "'loss_function' must be sub classes of 'Loss'"
self._loss_weights = to_list(loss_weights) self._loss_function = loss_function
def parameters(self, *args, **kwargs): def parameters(self, *args, **kwargs):
return self._adapter.parameters(*args, **kwargs) return self._adapter.parameters(*args, **kwargs)
...@@ -28,7 +28,7 @@ import paddle.fluid as fluid ...@@ -28,7 +28,7 @@ import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from model import Model from model import Model, CrossEntropy
def center_crop_resize(img): def center_crop_resize(img):
...@@ -358,7 +358,7 @@ def main(): ...@@ -358,7 +358,7 @@ def main():
with guard: with guard:
model = ResNet() model = ResNet()
sgd = make_optimizer(parameter_list=model.parameters()) sgd = make_optimizer(parameter_list=model.parameters())
model.prepare(sgd, 'cross_entropy') model.prepare(sgd, CrossEntropy())
for e in range(epoch): for e in range(epoch):
print("======== train epoch {} ========".format(e)) print("======== train epoch {} ========".format(e))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册