“4be0b9cb797e8cc93d676c3abb8f10da24cf013a”上不存在“data_backup/1.leetcode/247-中心对称数 II/solution.md”
未验证 提交 358f7852 编写于 作者: Q qingqing01 提交者: GitHub

Merge pull request #3 from qingqing01/api_loss

Refine Loss in Model
- repo: https://github.com/PaddlePaddle/mirrors-yapf.git
sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
hooks:
- id: yapf
files: \.py$
- repo: https://github.com/pre-commit/pre-commit-hooks
sha: a11d9314b22d8f8c7556443875b731ef05965464
hooks:
- id: check-merge-conflict
- id: check-symlinks
- id: detect-private-key
files: (?!.*paddle)^.*$
- id: end-of-file-fixer
files: \.(md|yml)$
- id: trailing-whitespace
files: \.(md|yml)$
- repo: https://github.com/Lucas-C/pre-commit-hooks
sha: v1.0.1
hooks:
- id: forbid-crlf
files: \.(md|yml)$
- id: remove-crlf
files: \.(md|yml)$
- id: forbid-tabs
files: \.(md|yml)$
- id: remove-tabs
files: \.(md|yml)$
......@@ -26,7 +26,7 @@ from paddle import fluid
from paddle.fluid.optimizer import Momentum
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from model import Model, CrossEntropy
from model import Model, CrossEntropy, Input
class SimpleImgConvPool(fluid.dygraph.Layer):
......@@ -78,7 +78,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
class MNIST(Model):
def __init__(self):
super(MNIST, self).__init__()
self._simple_img_conv_pool_1 = SimpleImgConvPool(
1, 20, 5, 2, 2, act="relu")
......@@ -88,7 +87,8 @@ class MNIST(Model):
pool_2_shape = 50 * 4 * 4
SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
self._fc = Linear(800,
self._fc = Linear(
800,
10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
......@@ -137,13 +137,15 @@ def main():
paddle.batch(paddle.dataset.mnist.test(),
batch_size=FLAGS.batch_size, drop_last=True), 1, 1)
device_ids = list(range(FLAGS.num_devices))
with guard:
model = MNIST()
optim = Momentum(learning_rate=FLAGS.lr, momentum=.9,
optim = Momentum(
learning_rate=FLAGS.lr,
momentum=.9,
parameter_list=model.parameters())
model.prepare(optim, CrossEntropy())
inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
model.prepare(optim, CrossEntropy(), inputs, labels)
if FLAGS.resume is not None:
model.load(FLAGS.resume)
......@@ -154,8 +156,7 @@ def main():
val_acc = 0.0
print("======== train epoch {} ========".format(e))
for idx, batch in enumerate(train_loader()):
outputs, losses = model.train(batch[0], batch[1], device='gpu',
device_ids=device_ids)
outputs, losses = model.train(batch[0], batch[1])
acc = accuracy(outputs[0], batch[1])[0]
train_loss += np.sum(losses)
......@@ -166,8 +167,7 @@ def main():
print("======== eval epoch {} ========".format(e))
for idx, batch in enumerate(val_loader()):
outputs, losses = model.eval(batch[0], batch[1], device='gpu',
device_ids=device_ids)
outputs, losses = model.eval(batch[0], batch[1])
acc = accuracy(outputs[0], batch[1])[0]
val_loss += np.sum(losses)
......@@ -185,14 +185,21 @@ if __name__ == '__main__':
parser.add_argument(
"-e", "--epoch", default=100, type=int, help="number of epoch")
parser.add_argument(
'--lr', '--learning-rate', default=1e-3, type=float, metavar='LR',
'--lr',
'--learning-rate',
default=1e-3,
type=float,
metavar='LR',
help='initial learning rate')
parser.add_argument(
"-b", "--batch_size", default=128, type=int, help="batch size")
parser.add_argument(
"-n", "--num_devices", default=4, type=int, help="number of devices")
"-n", "--num_devices", default=1, type=int, help="number of devices")
parser.add_argument(
"-r", "--resume", default=None, type=str,
"-r",
"--resume",
default=None,
type=str,
help="checkpoint path to resume")
FLAGS = parser.parse_args()
main()
......@@ -27,10 +27,12 @@ from paddle.fluid.executor import global_scope
from paddle.fluid.io import is_belong_to_optimizer
from paddle.fluid.dygraph.base import to_variable
__all__ = ['shape_hints', 'Model', 'Loss', 'CrossEntropy']
__all__ = ['Model', 'Loss', 'CrossEntropy', 'Input']
def to_list(value):
if value is None:
return value
if isinstance(value, (list, tuple)):
return value
return [value]
......@@ -51,20 +53,14 @@ def extract_args(func):
return inspect.getargspec(func)[0]
def shape_hints(**hints):
assert hints, "hints can not be empty"
assert all(isinstance(h, (list, tuple)) for h in hints.values()), \
"shape hint must be a list or tuple"
class Input(fluid.dygraph.Layer):
def __init__(self, shape=None, dtype=None, name=None):
self.shape = shape
self.dtype = dtype
self.name = name
def wrapper(func):
args = extract_args(func)
invalid = set(hints.keys()) - set(args)
assert not invalid, \
"shape hint for arguments that are not present in forward method" \
+ ": ({})".format(", ".join(invalid))
func.shape_hints = hints
return func
return wrapper
def forward(self):
return fluid.data(self.name, shape=self.shape, dtype=self.dtype)
class Loss(object):
......@@ -72,12 +68,6 @@ class Loss(object):
super(Loss, self).__init__()
self.average = average
def infer_shape(self, outputs):
return [o.shape for o in outputs]
def infer_dtype(self, outputs):
return [o.dtype for o in outputs]
def forward(self, outputs, labels):
raise NotImplementedError()
......@@ -86,24 +76,21 @@ class Loss(object):
if in_dygraph_mode():
labels = [to_variable(l) for l in labels]
losses = to_list(self.forward(to_list(outputs), labels))
if not self.average:
if self.average:
losses = [fluid.layers.reduce_mean(l) for l in losses]
else:
losses = [fluid.layers.reduce_sum(l) for l in losses]
return losses
return [fluid.layers.reduce_mean(l) for l in losses]
class CrossEntropy(Loss):
def __init__(self):
def __init__(self, average=True):
super(CrossEntropy, self).__init__()
def infer_shape(self, outputs):
return [o.shape[:-1] + (1, ) for o in outputs]
def infer_dtype(self, outputs):
return ['int64' for _ in outputs]
def forward(self, outputs, labels):
return [fluid.layers.cross_entropy(o, l) for o, l in zip(
outputs, labels)]
return [
fluid.layers.cross_entropy(o, l) for o, l in zip(outputs, labels)
]
class StaticGraphAdapter(object):
......@@ -116,21 +103,13 @@ class StaticGraphAdapter(object):
self._orig_prog = fluid.default_main_program()
self._label_vars = {} # label variables
self._input_vars = {} # label variables
self._endpoints = {}
self._loss_endpoint = None
self._executor = None
self._progs = {}
self._compiled_progs = {}
self._lazy_load_optimizer = None
# parse shape hints
self._input_desc = OrderedDict([
(n, None) for n in extract_args(self.model.forward) if n != 'self'
])
if hasattr(self.model.forward, 'shape_hints'):
self._input_desc.update(self.model.forward.shape_hints)
@property
def mode(self):
return self.model.mode
......@@ -139,21 +118,19 @@ class StaticGraphAdapter(object):
def mode(self, value):
self.model.mode = value
def train(self, inputs, labels, device='CPU', device_ids=None):
assert self.model._optimizer and self.model._loss_function, \
def train(self, inputs, labels=None):
assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first"
self.mode = 'train'
return self._run(inputs, labels, device, device_ids)
return self._run(inputs, labels)
def eval(self, inputs, labels, device='CPU', device_ids=None):
assert self.model._loss_function, \
"model not ready, please call `model.prepare()` first"
def eval(self, inputs, labels=None):
self.mode = 'eval'
return self._run(inputs, labels, device, device_ids)
return self._run(inputs, labels)
def test(self, inputs, device='CPU', device_ids=None):
def test(self, inputs):
self.mode = 'test'
return self._run(inputs, None, device, device_ids)
return self._run(inputs, None)
def parameters(self, *args, **kwargs):
return None
......@@ -162,8 +139,10 @@ class StaticGraphAdapter(object):
def _save(state, path):
if not state:
return
state = {k: to_numpy(v) if isinstance(v, Variable) else v
for k, v in state.items()}
state = {
k: to_numpy(v) if isinstance(v, Variable) else v
for k, v in state.items()
}
with open(path, 'wb') as f:
pickle.dump(state, f)
......@@ -179,8 +158,10 @@ class StaticGraphAdapter(object):
return
# XXX `optimizer.state_dict()` only work in dygraph mode
optim_path = path + ".pdopt"
optim = {p.name: p for p in filter(
is_belong_to_optimizer, prog.list_vars())}
optim = {
p.name: p
for p in filter(is_belong_to_optimizer, prog.list_vars())
}
if not optim:
return
......@@ -219,19 +200,15 @@ class StaticGraphAdapter(object):
if optim_state is None:
return
if self._executor is not None:
self._load_optimizer(optim_state)
else:
self._lazy_load_optimizer = optim_state
self._load_optimizer(optim_state, executor)
def _load_optimizer(self, state):
def _load_optimizer(self, state, executor):
prog = self._progs.get('train', None)
optim = list(filter(is_belong_to_optimizer, prog.list_vars()))
if not optim:
return
fluid.core._create_loaded_parameter(
optim, global_scope(), self._executor._default_executor)
fluid.core._create_loaded_parameter(optim, global_scope(), executor)
converted_state = dict(state)
for var in optim:
......@@ -261,14 +238,15 @@ class StaticGraphAdapter(object):
opt_cls_name = self.model._optimizer.__class__.__name__
opt_unq_name = None
for name in self.model._optimizer._accumulators.keys():
accum_name = name if opt_name is None else name[
len(opt_name) + 1:]
accum_name = name if opt_name is None else name[len(
opt_name) + 1:]
for param_name, state_var in self.model._optimizer._accumulators[
name].items():
if opt_unq_name is None:
# can not infer out the exact unique(opt_name),
# thus try to extract rather than generate
for state_key in sorted(state.keys(),
for state_key in sorted(
state.keys(),
key=lambda x: len(x),
reverse=True):
prefix = param_name + "_" + (
......@@ -307,21 +285,20 @@ class StaticGraphAdapter(object):
t.set(ndarray, place)
def _run(self, inputs, labels=None, device='CPU', device_ids=None):
def _run(self, inputs, labels=None):
compiled_prog = self._compiled_progs.get(self.mode, None)
assert compiled_prog, \
"Model is not ready, please call `model.prepare()` first"
inputs = to_list(inputs)
if labels is not None:
labels = to_list(labels)
assert len(inputs) == len(self._input_desc), "number of inputs" \
assert len(inputs) == len(self._input_vars[self.mode]), \
"number of inputs" \
+ " does not match number of arguments of `forward` method"
if self._progs.get(self.mode, None) is None:
self._make_program(self._infer_input_vars(inputs))
compiled_prog = self._compile_and_initialize(
self._progs[self.mode], device, device_ids)
feed = {}
input_names = [name for name in self._input_desc.keys()]
input_names = [v.name for v in self._input_vars[self.mode]]
for idx, n in enumerate(input_names):
# train and test may take different arguments
if inputs[idx] is not None:
......@@ -333,79 +310,76 @@ class StaticGraphAdapter(object):
endpoints = self._endpoints[self.mode]
fetch_list = endpoints['output'] + endpoints['loss']
num_output = len(endpoints['output'])
out = self._executor.run(
compiled_prog, feed=feed,
out = self._executor.run(compiled_prog,
feed=feed,
fetch_list=fetch_list)
if self.mode == 'test':
return out[:num_output]
else:
return out[:num_output], out[num_output:]
def _make_program(self, inputs):
def prepare(self):
modes = ['train', 'eval', 'test']
for mode in modes:
self._make_program(mode)
self._compile_and_initialize(self._progs[mode], mode)
def _make_program(self, mode):
prog = self._progs.get(mode, None)
if prog is not None:
return
prog = self._orig_prog.clone()
# change inputs to the same var in cloned program
inputs = fluid.layers.utils.map_structure(
lambda var: prog.global_block().var(var.name), inputs)
# NOTE: When defining learning rate scheduling in static-graph, ops to
# increase the global step var and calculate learning rate would be
# prepended into _orig_prog. test program maked by `_orig_prog.clone`
# also would include these ops. Thus must prune these ops in test
# program, otherwise the global step would be changed in test.
if self.mode != 'train':
if mode != 'train':
for op in list(prog.global_block().ops):
prog.global_block()._remove_op(0)
if self.mode == 'train' and self.model._optimizer._learning_rate_map:
if mode == 'train' and self.model._optimizer \
and self.model._optimizer._learning_rate_map:
# HACK workaround learning rate map issue
lr_var = self.model._optimizer._learning_rate_map[self._orig_prog]
self.model._optimizer._learning_rate_map[prog] = lr_var
losses = []
with fluid.program_guard(prog, self._startup_prog):
if isinstance(self.model._inputs, dict):
ins = [self.model._inputs[n] \
for n in extract_args(self.model.forward) if n != 'self']
else:
ins = self.model._inputs
lbls = self.model._labels if self.model._labels else []
inputs = [k.forward() for k in to_list(ins)]
labels = [k.forward() for k in to_list(lbls)]
outputs = to_list(self.model.forward(*inputs))
if self.mode != 'test':
label_vars = self._infer_label_vars(outputs)
self._label_vars[self.mode] = label_vars
losses = self.model._loss_function(outputs, label_vars)
if self.mode == 'train':
if mode != 'test':
if self.model._loss_function:
losses = self.model._loss_function(outputs, labels)
if mode == 'train' and self.model._optimizer:
self._loss_endpoint = fluid.layers.sum(losses)
self.model._optimizer.minimize(self._loss_endpoint)
if self.mode != 'train': # clone again to put it in test mode
if mode != 'train': # clone again to put it in test mode
prog = prog.clone(for_test=True)
self._progs[self.mode] = prog
self._endpoints[self.mode] = {
"output": outputs,
"loss": losses
}
def _infer_input_vars(self, inputs):
input_vars = []
for idx, i in enumerate(inputs):
if i is None: # train and test may take different arguments
input_vars.append(None)
continue
ndarray = np.array(i)
name = list(self._input_desc.keys())[idx]
shape = list(self._input_desc.values())[idx]
if shape is None:
shape = (None, ) + ndarray.shape[1:]
input_vars.append(fluid.data(name, shape, ndarray.dtype))
return input_vars
def _infer_label_vars(self, outputs):
shapes = self.model._loss_function.infer_shape(outputs)
dtypes = self.model._loss_function.infer_dtype(outputs)
label_vars = []
for idx, (shape, dtype) in enumerate(zip(shapes, dtypes)):
name = '__label{}'.format(idx)
label_vars.append(fluid.data(name, shape, dtype))
return label_vars
def _compile_and_initialize(self, prog, device='CPU', device_ids=None):
compiled_prog = self._compiled_progs.get(self.mode, None)
self._input_vars[mode] = inputs
self._label_vars[mode] = labels
self._progs[mode] = prog
self._endpoints[mode] = {"output": outputs, "loss": losses}
def _compile_and_initialize(self, prog, mode):
compiled_prog = self._compiled_progs.get(mode, None)
if compiled_prog is not None:
return compiled_prog
places = [device.lower() == 'gpu' and fluid.CUDAPlace(i)
or fluid.CPUPlace() for i in device_ids]
device = self.model._device
device_ids = self.model._device_ids
if device.lower() == 'gpu':
places = fluid.cuda_places(device_ids)
else:
places = fluid.cpu_places(len(device_ids) if device_ids else None)
# XXX *ALL WEIGHTS* should be initialized upon model construction
# even if `forward()` may run different code path for different mode
......@@ -423,31 +397,14 @@ class StaticGraphAdapter(object):
startup_prog = self._startup_prog._prune(uninitialized)
self._executor.run(startup_prog)
if self.mode == 'train' and self._lazy_load_optimizer:
self._load_optimizer(self._lazy_load_optimizer)
self._lazy_load_optimizer = None
compiled_prog = fluid.CompiledProgram(prog)
if len(device_ids) > 1:
if len(places) > 1:
loss_name = None
if self.mode == 'train' and self._loss_endpoint is not None:
if mode == 'train' and self._loss_endpoint is not None:
loss_name = self._loss_endpoint.name
share_vars_from = None
if self.mode == 'eval' and 'train' in self._compiled_progs:
share_vars_from = self._compiled_progs['train']
# HACK invalidate eval program if is compiled before train program
# quite hackish, OTOH, it is generally uncommon that the eval
# program will be run before the train program
if self.mode == 'train' and 'eval' in self._compiled_progs:
del self._compiled_progs['eval']
compiled_prog = compiled_prog.with_data_parallel(
loss_name=loss_name, places=places,
share_vars_from=share_vars_from)
self._compiled_progs[self.mode] = compiled_prog
return compiled_prog
loss_name=loss_name, places=places)
self._compiled_progs[mode] = compiled_prog
class DynamicGraphAdapter(object):
......@@ -464,12 +421,13 @@ class DynamicGraphAdapter(object):
self.model.mode = value
# TODO multi device in dygraph mode not implemented at present time
def train(self, inputs, labels, device='CPU', device_ids=None):
assert self.model._optimizer and self.model._loss_function, \
def train(self, inputs, labels=None):
assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first"
super(Model, self.model).train()
self.mode = 'train'
inputs = to_list(inputs)
if labels is not None:
labels = to_list(labels)
outputs = self.model.forward(*[to_variable(x) for x in inputs])
losses = self.model._loss_function(outputs, labels)
......@@ -480,19 +438,25 @@ class DynamicGraphAdapter(object):
return [to_numpy(o) for o in to_list(outputs)], \
[to_numpy(l) for l in losses]
def eval(self, inputs, labels, device='CPU', device_ids=None):
assert self.model._loss_function, \
"model not ready, please call `model.prepare()` first"
def eval(self, inputs, labels=None):
super(Model, self.model).eval()
self.mode = 'eval'
inputs = to_list(inputs)
if labels is not None:
labels = to_list(labels)
outputs = self.model.forward(*[to_variable(x) for x in inputs])
if self.model._loss_function:
losses = self.model._loss_function(outputs, labels)
else:
losses = []
# To be consistent with static graph
# return empty loss if loss_function is None
return [to_numpy(o) for o in to_list(outputs)], \
[to_numpy(l) for l in losses]
def test(self, inputs, device='CPU', device_ids=None):
def test(self, inputs):
super(Model, self.model).eval()
self.mode = 'test'
inputs = [to_variable(x) for x in to_list(inputs)]
......@@ -528,9 +492,8 @@ class DynamicGraphAdapter(object):
opt_cls_name = self.model._optimizer.__class__.__name__
opt_name = opt_unq_name[:opt_unq_name.rfind("_")] # remove suffix idx
param_names = [param.name for param in self.model.parameters()]
for var_name, state_var in sorted(optim.items(),
key=lambda x: len(x[0]),
reverse=True):
for var_name, state_var in sorted(
optim.items(), key=lambda x: len(x[0]), reverse=True):
if var_name in ["@LR_DECAY_COUNTER@", "global_step"]:
# NOTE: dygraph saved global_step is 1 larger than that in
# static-graph, since the time of global_step to increase is
......@@ -564,11 +527,21 @@ class DynamicGraphAdapter(object):
class Model(fluid.dygraph.Layer):
"""
FIXME: add more comments and usage
"""
def __init__(self):
super(Model, self).__init__(self.__class__.__name__)
self.mode = 'train'
self._inputs = None
self._labels = None
self._loss_function = None
self._loss_weights = None
self._loss = None
self._optimizer = None
self._device = None
self._device_ids = None
self._optimizer = None
if in_dygraph_mode():
self._adapter = DynamicGraphAdapter(self)
......@@ -590,11 +563,65 @@ class Model(fluid.dygraph.Layer):
def load(self, *args, **kwargs):
return self._adapter.load(*args, **kwargs)
def prepare(self, optimizer, loss_function):
def prepare(self,
optimizer=None,
loss_function=None,
inputs=None,
labels=None,
device=None,
device_ids=None):
"""
FIXME: add comments
Args:
optimizer (Optimizer|None): optimizer must be set in training
and should be a Optimizer instance. It can be None in eval
and test mode.
loss_function (Loss|None): loss function must be set in training
and should be a Loss instance. It can be None when there is
no loss.
inputs (Input|list|dict|None): inputs, entry points of network,
could be a Input layer, or lits of Input layers,
or dict (name: Input), or None. For static graph,
inputs must be set. For dynamic graph, it could be None.
labels (Input|list|None): labels, entry points of network,
could be a Input layer or lits of Input layers, or None.
For static graph, if set loss_function in Model.prepare(), it
must be set. Otherwise, it could be None.
device (str|None): specify device type, 'CPU' or 'GPU'.
If None, automatically select device according to
installation package version.
device_ids (list[int]|None): specify device index. If None,
the available device will be obtained from the environment
variable when the model is executed: If the GPU is used, the
currently available device ID is obtained from the environment
variable FLAGS_selected_gpus or CUDA_VISIBLE_DEVICES when the
model is executed; CPU, when the model is executed,
the currently available CPU number is obtained from the
environment variable CPU_NUM. For example, export CPU_NUM=4,
if the environment variable is not set, the executor will add
the variable to the environment variable and set its value to 1.
The default is None.
"""
self._optimizer = optimizer
assert isinstance(loss_function, Loss), \
"'loss_function' must be sub classes of 'Loss'"
if loss_function:
if not isinstance(loss_function, Loss):
raise TypeError(
"'loss_function' must be sub classes of 'Loss'")
self._loss_function = loss_function
if not in_dygraph_mode():
if not isinstance(inputs, (list, dict, Input)):
raise TypeError(
"'inputs' must be list or dict in static graph mode")
if loss_function and not isinstance(labels, (list, Input)):
raise TypeError("'labels' must be list in static graph mode")
self._inputs = inputs
self._labels = labels
self._device = device
if device is None:
self._device = 'GPU' if fluid.is_compiled_with_cuda() else 'CPU'
self._device_ids = device_ids
if not in_dygraph_mode():
self._adapter.prepare()
def parameters(self, *args, **kwargs):
return self._adapter.parameters(*args, **kwargs)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册