提交 59f12446 编写于 作者: D dengkaipeng

merge master

- repo: https://github.com/PaddlePaddle/mirrors-yapf.git
sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
hooks:
- id: yapf
files: \.py$
- repo: https://github.com/pre-commit/pre-commit-hooks
sha: a11d9314b22d8f8c7556443875b731ef05965464
hooks:
- id: check-merge-conflict
- id: check-symlinks
- id: detect-private-key
files: (?!.*paddle)^.*$
- id: end-of-file-fixer
files: \.(md|yml)$
- id: trailing-whitespace
files: \.(md|yml)$
- repo: https://github.com/Lucas-C/pre-commit-hooks
sha: v1.0.1
hooks:
- id: forbid-crlf
files: \.(md|yml)$
- id: remove-crlf
files: \.(md|yml)$
- id: forbid-tabs
files: \.(md|yml)$
- id: remove-tabs
files: \.(md|yml)$
......@@ -26,7 +26,7 @@ from paddle import fluid
from paddle.fluid.optimizer import Momentum
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from model import Model, CrossEntropy
from model import Model, CrossEntropy, Input
from metrics import Accuracy
......@@ -79,7 +79,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
class MNIST(Model):
def __init__(self):
super(MNIST, self).__init__()
self._simple_img_conv_pool_1 = SimpleImgConvPool(
1, 20, 5, 2, 2, act="relu")
......@@ -89,12 +88,13 @@ class MNIST(Model):
pool_2_shape = 50 * 4 * 4
SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
self._fc = Linear(800,
10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)),
act="softmax")
self._fc = Linear(
800,
10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs):
x = self._simple_img_conv_pool_1(inputs)
......@@ -138,13 +138,15 @@ def main():
paddle.batch(paddle.dataset.mnist.test(),
batch_size=FLAGS.batch_size, drop_last=True), 1, 1)
device_ids = list(range(FLAGS.num_devices))
with guard:
model = MNIST()
optim = Momentum(learning_rate=FLAGS.lr, momentum=.9,
parameter_list=model.parameters())
model.prepare(optim, CrossEntropy(), metrics=Accuracy(topk=(1, 2)))
optim = Momentum(
learning_rate=FLAGS.lr,
momentum=.9,
parameter_list=model.parameters())
inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
model.prepare(optim, CrossEntropy(), Accuracy(topk=(1, 2)), inputs, labels)
if FLAGS.resume is not None:
model.load(FLAGS.resume)
......@@ -153,8 +155,7 @@ def main():
val_loss = 0.0
print("======== train epoch {} ========".format(e))
for idx, batch in enumerate(train_loader()):
losses, metrics = model.train(batch[0], batch[1], device='gpu',
device_ids=device_ids)
losses, metrics = model.train(batch[0], batch[1])
train_loss += np.sum(losses)
if idx % 10 == 0:
......@@ -167,8 +168,7 @@ def main():
print("======== eval epoch {} ========".format(e))
for idx, batch in enumerate(val_loader()):
losses, metrics = model.eval(batch[0], batch[1], device='gpu',
device_ids=device_ids)
losses, metrics = model.eval(batch[0], batch[1])
val_loss += np.sum(losses)
if idx % 10 == 0:
......@@ -188,14 +188,21 @@ if __name__ == '__main__':
parser.add_argument(
"-e", "--epoch", default=100, type=int, help="number of epoch")
parser.add_argument(
'--lr', '--learning-rate', default=1e-3, type=float, metavar='LR',
'--lr',
'--learning-rate',
default=1e-3,
type=float,
metavar='LR',
help='initial learning rate')
parser.add_argument(
"-b", "--batch_size", default=128, type=int, help="batch size")
parser.add_argument(
"-n", "--num_devices", default=4, type=int, help="number of devices")
"-n", "--num_devices", default=1, type=int, help="number of devices")
parser.add_argument(
"-r", "--resume", default=None, type=str,
"-r",
"--resume",
default=None,
type=str,
help="checkpoint path to resume")
FLAGS = parser.parse_args()
main()
......@@ -28,10 +28,12 @@ from paddle.fluid.io import is_belong_to_optimizer
from paddle.fluid.dygraph.base import to_variable
from metrics import Metric
__all__ = ['shape_hints', 'Model', 'Loss', 'CrossEntropy']
__all__ = ['Model', 'Loss', 'CrossEntropy', 'Input']
def to_list(value):
if value is None:
return value
if isinstance(value, (list, tuple)):
return value
return [value]
......@@ -72,20 +74,14 @@ def extract_args(func):
return inspect.getargspec(func)[0]
def shape_hints(**hints):
assert hints, "hints can not be empty"
assert all(isinstance(h, (list, tuple)) for h in hints.values()), \
"shape hint must be a list or tuple"
class Input(fluid.dygraph.Layer):
def __init__(self, shape=None, dtype=None, name=None):
self.shape = shape
self.dtype = dtype
self.name = name
def wrapper(func):
args = extract_args(func)
invalid = set(hints.keys()) - set(args)
assert not invalid, \
"shape hint for arguments that are not present in forward method" \
+ ": ({})".format(", ".join(invalid))
func.shape_hints = hints
return func
return wrapper
def forward(self):
return fluid.data(self.name, shape=self.shape, dtype=self.dtype)
class Loss(object):
......@@ -93,12 +89,6 @@ class Loss(object):
super(Loss, self).__init__()
self.average = average
def infer_shape(self, outputs):
return [o.shape for o in outputs]
def infer_dtype(self, outputs):
return [o.dtype for o in outputs]
def forward(self, outputs, labels):
raise NotImplementedError()
......@@ -107,24 +97,21 @@ class Loss(object):
if in_dygraph_mode():
labels = [to_variable(l) for l in labels]
losses = to_list(self.forward(to_list(outputs), labels))
if not self.average:
return losses
return [fluid.layers.reduce_mean(l) for l in losses]
if self.average:
losses = [fluid.layers.reduce_mean(l) for l in losses]
else:
losses = [fluid.layers.reduce_sum(l) for l in losses]
return losses
class CrossEntropy(Loss):
def __init__(self):
def __init__(self, average=True):
super(CrossEntropy, self).__init__()
def infer_shape(self, outputs):
return [o.shape[:-1] + (1, ) for o in outputs]
def infer_dtype(self, outputs):
return ['int64' for _ in outputs]
def forward(self, outputs, labels):
return [fluid.layers.cross_entropy(o, l) for o, l in zip(
outputs, labels)]
return [
fluid.layers.cross_entropy(o, l) for o, l in zip(outputs, labels)
]
class StaticGraphAdapter(object):
......@@ -137,21 +124,13 @@ class StaticGraphAdapter(object):
self._orig_prog = fluid.default_main_program()
self._label_vars = {} # label variables
self._input_vars = {} # label variables
self._endpoints = {}
self._loss_endpoint = None
self._executor = None
self._progs = {}
self._compiled_progs = {}
self._lazy_load_optimizer = None
# parse shape hints
self._input_desc = OrderedDict([
(n, None) for n in extract_args(self.model.forward) if n != 'self'
])
if hasattr(self.model.forward, 'shape_hints'):
self._input_desc.update(self.model.forward.shape_hints)
@property
def mode(self):
return self.model.mode
......@@ -160,21 +139,19 @@ class StaticGraphAdapter(object):
def mode(self, value):
self.model.mode = value
def train(self, inputs, labels, device='CPU', device_ids=None):
assert self.model._optimizer and self.model._loss_function, \
def train(self, inputs, labels=None):
assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first"
self.mode = 'train'
return self._run(inputs, labels, device, device_ids)
return self._run(inputs, labels)
def eval(self, inputs, labels, device='CPU', device_ids=None):
assert self.model._loss_function, \
"model not ready, please call `model.prepare()` first"
def eval(self, inputs, labels=None):
self.mode = 'eval'
return self._run(inputs, labels, device, device_ids)
return self._run(inputs, labels)
def test(self, inputs, device='CPU', device_ids=None):
def test(self, inputs):
self.mode = 'test'
return self._run(inputs, None, device, device_ids)
return self._run(inputs, None)
def parameters(self, *args, **kwargs):
return None
......@@ -183,13 +160,18 @@ class StaticGraphAdapter(object):
def _save(state, path):
if not state:
return
state = {k: to_numpy(v) if isinstance(v, Variable) else v
for k, v in state.items()}
state = {
k: to_numpy(v) if isinstance(v, Variable) else v
for k, v in state.items()
}
with open(path, 'wb') as f:
pickle.dump(state, f)
base = os.path.basename(path)
assert base != "", "path should be of 'dirname/filename' format"
dir_name = os.path.dirname(path)
if dir_name and not os.path.exists(dir_name):
os.makedirs(dir_name)
param_path = path + ".pdparams"
_save(self.model.state_dict(), param_path)
prog = self._progs.get('train', None)
......@@ -197,13 +179,13 @@ class StaticGraphAdapter(object):
return
# XXX `optimizer.state_dict()` only work in dygraph mode
optim_path = path + ".pdopt"
optim = {p.name: p for p in filter(
is_belong_to_optimizer, prog.list_vars())}
optim = {
p.name: p
for p in filter(is_belong_to_optimizer, prog.list_vars())
}
if not optim:
return
# HACK this is contrived, optimizer state is not the same for
# static/dynamic graph mode
optim['__static_graph_only__'] = True
_save(optim, optim_path)
def load(self, path):
......@@ -238,27 +220,77 @@ class StaticGraphAdapter(object):
optim_state = _load(optim_path)
if optim_state is None:
return
assert '__static_graph_only__' in optim_state, \
"optimizer saved in dygraph mode is not usable in static graph"
if self._executor is not None:
self._load_optimizer(optim_state)
else:
self._lazy_load_optimizer = optim_state
self._load_optimizer(optim_state, executor)
def _load_optimizer(self, state):
def _load_optimizer(self, state, executor):
prog = self._progs.get('train', None)
optim = list(filter(is_belong_to_optimizer, prog.list_vars()))
if not optim:
return
fluid.core._create_loaded_parameter(
optim, global_scope(), self._executor._default_executor)
fluid.core._create_loaded_parameter(optim, global_scope(), executor)
converted_state = dict(state)
for var in optim:
assert var.name in state, \
if var.name in ["@LR_DECAY_COUNTER@", "global_step"]:
# When using learning rate scheduler, dygraph would name the
# global step var as "global_step" to save, while static-graph
# would has a state var named as "@LR_DECAY_COUNTER@".
# NOTE: dygraph saved global_step is 1 larger than that in
# static-graph, since the time of global_step to increase is
# different.
state_val = (
np.array(converted_state.pop("global_step")) - 1
) if "global_step" in converted_state else converted_state.pop(
"@LR_DECAY_COUNTER@", None)
if state_val is not None:
converted_state[var.name] = state_val
elif var.name.startswith("learning_rate_"):
# When using static learning rate, static-graph would make it
# a persistable var named 'unique_name.generate("learning_rate")',
# However, dygraph wouldn't save it.
if var.name not in state: continue
else:
# moment and other accumulators
if var.name not in converted_state:
# try to convert from dygraph name
opt_name = self.model._optimizer._name
opt_cls_name = self.model._optimizer.__class__.__name__
opt_unq_name = None
for name in self.model._optimizer._accumulators.keys():
accum_name = name if opt_name is None else name[len(
opt_name) + 1:]
for param_name, state_var in self.model._optimizer._accumulators[
name].items():
if opt_unq_name is None:
# can not infer out the exact unique(opt_name),
# thus try to extract rather than generate
for state_key in sorted(
state.keys(),
key=lambda x: len(x),
reverse=True):
prefix = param_name + "_" + (
opt_cls_name if opt_name is None else
opt_name) + "_"
if state_key.startswith(prefix):
prefix_offset = state_key[len(
prefix):].find("_") + len(prefix)
opt_unq_name = state_key[len(
param_name + "_"):prefix_offset]
# TODO: assert
# assert opt_unq_name is None
# gen(param.name + "_" + gen(opt_name) + "_" + accum_name)
# always end with "_0" since the unique optimizer._name
dy_state_name = (param_name + "_" + opt_unq_name +
"_" + accum_name + "_0")
converted_state[
state_var.name] = converted_state.pop(
dy_state_name)
assert var.name in converted_state, \
"variable [{}] is not in optimizer state file".format(var.name)
self._set_var(var, state[var.name])
self._set_var(var, converted_state[var.name])
def _set_var(self, var, ndarray):
t = global_scope().find_var(var.name).get_tensor()
......@@ -274,21 +306,20 @@ class StaticGraphAdapter(object):
t.set(ndarray, place)
def _run(self, inputs, labels=None, device='CPU', device_ids=None):
def _run(self, inputs, labels=None):
compiled_prog = self._compiled_progs.get(self.mode, None)
assert compiled_prog, \
"Model is not ready, please call `model.prepare()` first"
inputs = to_list(inputs)
if labels is not None:
labels = to_list(labels)
assert len(inputs) == len(self._input_desc), "number of inputs" \
assert len(inputs) == len(self._input_vars[self.mode]), \
"number of inputs" \
+ " does not match number of arguments of `forward` method"
if self._progs.get(self.mode, None) is None:
self._make_program(self._infer_input_vars(inputs))
compiled_prog = self._compile_and_initialize(
self._progs[self.mode], device, device_ids)
feed = {}
input_names = [name for name in self._input_desc.keys()]
input_names = [v.name for v in self._input_vars[self.mode]]
for idx, n in enumerate(input_names):
# train and test may take different arguments
if inputs[idx] is not None:
......@@ -319,64 +350,71 @@ class StaticGraphAdapter(object):
metrics.append(metric.update(*state))
return (losses, metrics) if len(metrics) > 0 else losses
def _make_program(self, inputs):
def prepare(self):
modes = ['train', 'eval', 'test']
for mode in modes:
self._make_program(mode)
self._compile_and_initialize(self._progs[mode], mode)
def _make_program(self, mode):
prog = self._progs.get(mode, None)
if prog is not None:
return
prog = self._orig_prog.clone()
if self.mode == 'train' and self.model._optimizer._learning_rate_map:
# NOTE: When defining learning rate scheduling in static-graph, ops to
# increase the global step var and calculate learning rate would be
# prepended into _orig_prog. test program maked by `_orig_prog.clone`
# also would include these ops. Thus must prune these ops in test
# program, otherwise the global step would be changed in test.
if mode != 'train':
for op in list(prog.global_block().ops):
prog.global_block()._remove_op(0)
if mode == 'train' and self.model._optimizer \
and self.model._optimizer._learning_rate_map:
# HACK workaround learning rate map issue
lr_var = self.model._optimizer._learning_rate_map[self._orig_prog]
self.model._optimizer._learning_rate_map[prog] = lr_var
losses = []
metrics = []
with fluid.program_guard(prog, self._startup_prog):
if isinstance(self.model._inputs, dict):
ins = [self.model._inputs[n] \
for n in extract_args(self.model.forward) if n != 'self']
else:
ins = self.model._inputs
lbls = self.model._labels if self.model._labels else []
inputs = [k.forward() for k in to_list(ins)]
labels = [k.forward() for k in to_list(lbls)]
outputs = to_list(self.model.forward(*inputs))
if self.mode != 'test':
label_vars = self._infer_label_vars(outputs)
self._label_vars[self.mode] = label_vars
losses = self.model._loss_function(outputs, label_vars)
metrics = []
for metric in self.model._metrics:
metrics.append(to_list(metric.add_metric_op(outputs, label_vars)))
if self.mode == 'train':
if mode != 'test':
if self.model._loss_function:
losses = self.model._loss_function(outputs, labels)
for metric in self.model._metrics:
metrics.append(to_list(metric.add_metric_op(outputs, labels)))
if mode == 'train' and self.model._optimizer:
self._loss_endpoint = fluid.layers.sum(losses)
self.model._optimizer.minimize(self._loss_endpoint)
if self.mode != 'train': # clone again to put it in test mode
if mode != 'train': # clone again to put it in test mode
prog = prog.clone(for_test=True)
self._progs[self.mode] = prog
self._endpoints[self.mode] = {
"output": outputs,
"loss": losses,
"metric": metrics,
}
def _infer_input_vars(self, inputs):
input_vars = []
for idx, i in enumerate(inputs):
if i is None: # train and test may take different arguments
input_vars.append(None)
continue
ndarray = np.array(i)
name = list(self._input_desc.keys())[idx]
shape = list(self._input_desc.values())[idx]
if shape is None:
shape = (None, ) + ndarray.shape[1:]
input_vars.append(fluid.data(name, shape, ndarray.dtype))
return input_vars
def _infer_label_vars(self, outputs):
shapes = self.model._loss_function.infer_shape(outputs)
dtypes = self.model._loss_function.infer_dtype(outputs)
label_vars = []
for idx, (shape, dtype) in enumerate(zip(shapes, dtypes)):
name = '__label{}'.format(idx)
label_vars.append(fluid.data(name, shape, dtype))
return label_vars
def _compile_and_initialize(self, prog, device='CPU', device_ids=None):
compiled_prog = self._compiled_progs.get(self.mode, None)
self._input_vars[mode] = inputs
self._label_vars[mode] = labels
self._progs[mode] = prog
self._endpoints[mode] = {"output": outputs, "loss": losses, "metric": metrics}
def _compile_and_initialize(self, prog, mode):
compiled_prog = self._compiled_progs.get(mode, None)
if compiled_prog is not None:
return compiled_prog
places = [device.lower() == 'gpu' and fluid.CUDAPlace(i)
or fluid.CPUPlace() for i in device_ids]
device = self.model._device
device_ids = self.model._device_ids
if device.lower() == 'gpu':
places = fluid.cuda_places(device_ids)
else:
places = fluid.cpu_places(len(device_ids) if device_ids else None)
# XXX *ALL WEIGHTS* should be initialized upon model construction
# even if `forward()` may run different code path for different mode
......@@ -394,31 +432,14 @@ class StaticGraphAdapter(object):
startup_prog = self._startup_prog._prune(uninitialized)
self._executor.run(startup_prog)
if self.mode == 'train' and self._lazy_load_optimizer:
self._load_optimizer(self._lazy_load_optimizer)
self._lazy_load_optimizer = None
compiled_prog = fluid.CompiledProgram(prog)
if len(device_ids) > 1:
if len(places) > 1:
loss_name = None
if self.mode == 'train' and self._loss_endpoint is not None:
if mode == 'train' and self._loss_endpoint is not None:
loss_name = self._loss_endpoint.name
share_vars_from = None
if self.mode == 'eval' and 'train' in self._compiled_progs:
share_vars_from = self._compiled_progs['train']
# HACK invalidate eval program if is compiled before train program
# quite hackish, OTOH, it is generally uncommon that the eval
# program will be run before the train program
if self.mode == 'train' and 'eval' in self._compiled_progs:
del self._compiled_progs['eval']
compiled_prog = compiled_prog.with_data_parallel(
loss_name=loss_name, places=places,
share_vars_from=share_vars_from)
self._compiled_progs[self.mode] = compiled_prog
return compiled_prog
loss_name=loss_name, places=places)
self._compiled_progs[mode] = compiled_prog
class DynamicGraphAdapter(object):
......@@ -435,13 +456,14 @@ class DynamicGraphAdapter(object):
self.model.mode = value
# TODO multi device in dygraph mode not implemented at present time
def train(self, inputs, labels, device='CPU', device_ids=None):
assert self.model._optimizer and self.model._loss_function, \
def train(self, inputs, labels=None):
assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first"
super(Model, self.model).train()
self.mode = 'train'
inputs = to_list(inputs)
labels = to_list(labels)
if labels is not None:
labels = to_list(labels)
outputs = to_list(self.model.forward(*[to_variable(x) for x in inputs]))
losses = self.model._loss_function(outputs, labels)
final_loss = fluid.layers.sum(losses)
......@@ -456,24 +478,31 @@ class DynamicGraphAdapter(object):
return ([to_numpy(l) for l in losses], metrics) \
if len(metrics) > 0 else [to_numpy(l) for l in losses]
def eval(self, inputs, labels, device='CPU', device_ids=None):
assert self.model._loss_function, \
"model not ready, please call `model.prepare()` first"
def eval(self, inputs, labels=None):
super(Model, self.model).eval()
self.mode = 'eval'
inputs = to_list(inputs)
labels = to_list(labels)
if labels is not None:
labels = to_list(labels)
outputs = to_list(self.model.forward(*[to_variable(x) for x in inputs]))
losses = self.model._loss_function(outputs, labels)
if self.model._loss_function:
losses = self.model._loss_function(outputs, labels)
else:
losses = []
metrics = []
for metric in self.model._metrics:
metric_outs = metric.add_metric_op(outputs, [to_variable(l) for l in labels])
m = metric.update(*[to_numpy(m) for m in to_list(metric_outs)])
metrics.append(m)
# To be consistent with static graph
# return empty loss if loss_function is None
return ([to_numpy(l) for l in losses], metrics) \
if len(metrics) > 0 else [to_numpy(l) for l in losses]
def test(self, inputs, device='CPU', device_ids=None):
def test(self, inputs):
super(Model, self.model).eval()
self.mode = 'test'
inputs = [to_variable(x) for x in to_list(inputs)]
......@@ -497,15 +526,68 @@ class DynamicGraphAdapter(object):
self.model.set_dict(params)
if self.model._optimizer is None or optim is None:
return
self.model._optimizer.set_dict(optim)
# If optimizer performs set_dict when state vars haven't been created,
# which would happen when set_dict before minimize, the state would be
# stored in optimizer._accumulators_holder and loaded lazily.
# To contrive this when loading from static-graph saved states, extend
# state dict to include keys named accoring to dygraph naming rules.
# TODO: if len(self.model._optimizer._accumulators) > 0
converted_state = dict(optim)
opt_unq_name = self.model._optimizer._name
opt_cls_name = self.model._optimizer.__class__.__name__
opt_name = opt_unq_name[:opt_unq_name.rfind("_")] # remove suffix idx
param_names = [param.name for param in self.model.parameters()]
for var_name, state_var in sorted(
optim.items(), key=lambda x: len(x[0]), reverse=True):
if var_name in ["@LR_DECAY_COUNTER@", "global_step"]:
# NOTE: dygraph saved global_step is 1 larger than that in
# static-graph, since the time of global_step to increase is
# different.
if var_name == "@LR_DECAY_COUNTER@":
converted_state["global_step"] = np.array(
converted_state.pop("@LR_DECAY_COUNTER@")) + 1
else:
# moment and other accumulators
# extend state dict to include promising dygraph names
for param_name in param_names:
if var_name.startswith(param_name + "_" + opt_name):
# when init optimizer with name
accum_name = var_name[len(param_name + "_" + opt_name +
"_"):]
elif var_name.startswith(param_name +
"_") and opt_name == opt_cls_name:
# when init optimizer without name
accum_name = var_name[len(param_name + "_"):]
else:
continue
# remove suffix idx
accum_name = accum_name[:accum_name.rfind("_")]
# state names always end with "_0" in dygraph because of the
# unique optimizer._name
dy_state_name = (param_name + "_" + opt_unq_name + "_" +
accum_name + "_0")
converted_state[dy_state_name] = state_var
self.model._optimizer.set_dict(converted_state)
class Model(fluid.dygraph.Layer):
"""
FIXME: add more comments and usage
"""
def __init__(self):
super(Model, self).__init__(self.__class__.__name__)
self.mode = 'train'
self._inputs = None
self._labels = None
self._loss_function = None
self._loss_weights = None
self._loss = None
self._optimizer = None
self._device = None
self._device_ids = None
self._optimizer = None
if in_dygraph_mode():
self._adapter = DynamicGraphAdapter(self)
......@@ -527,15 +609,75 @@ class Model(fluid.dygraph.Layer):
def load(self, *args, **kwargs):
return self._adapter.load(*args, **kwargs)
def prepare(self, optimizer, loss_function, metrics=[]):
def prepare(self,
optimizer=None,
loss_function=None,
metrics=None,
inputs=None,
labels=None,
device=None,
device_ids=None):
"""
FIXME: add comments
Args:
optimizer (Optimizer|None): optimizer must be set in training
and should be a Optimizer instance. It can be None in eval
and test mode.
loss_function (Loss|None): loss function must be set in training
and should be a Loss instance. It can be None when there is
no loss.
metrics (Metric|list of Metric|None): if metrics is set, all
metric will be calculate and output in train/eval mode.
inputs (Input|list|dict|None): inputs, entry points of network,
could be a Input layer, or lits of Input layers,
or dict (name: Input), or None. For static graph,
inputs must be set. For dynamic graph, it could be None.
labels (Input|list|None): labels, entry points of network,
could be a Input layer or lits of Input layers, or None.
For static graph, if set loss_function in Model.prepare(), it
must be set. Otherwise, it could be None.
device (str|None): specify device type, 'CPU' or 'GPU'.
If None, automatically select device according to
installation package version.
device_ids (list[int]|None): specify device index. If None,
the available device will be obtained from the environment
variable when the model is executed: If the GPU is used, the
currently available device ID is obtained from the environment
variable FLAGS_selected_gpus or CUDA_VISIBLE_DEVICES when the
model is executed; CPU, when the model is executed,
the currently available CPU number is obtained from the
environment variable CPU_NUM. For example, export CPU_NUM=4,
if the environment variable is not set, the executor will add
the variable to the environment variable and set its value to 1.
The default is None.
"""
self._optimizer = optimizer
assert isinstance(loss_function, Loss), \
"'loss_function' must be sub classes of 'Loss'"
if loss_function:
if not isinstance(loss_function, Loss):
raise TypeError(
"'loss_function' must be sub classes of 'Loss'")
self._loss_function = loss_function
if not in_dygraph_mode():
if not isinstance(inputs, (list, dict, Input)):
raise TypeError(
"'inputs' must be list or dict in static graph mode")
if loss_function and not isinstance(labels, (list, Input)):
raise TypeError("'labels' must be list in static graph mode")
metrics = metrics or []
for metric in to_list(metrics):
assert isinstance(metric, Metric), \
"{} is not sub class of Metric".format(metric.__class__.__name__)
self._metrics = to_list(metrics)
self._inputs = inputs
self._labels = labels
self._device = device
if device is None:
self._device = 'GPU' if fluid.is_compiled_with_cuda() else 'CPU'
self._device_ids = device_ids
if not in_dygraph_mode():
self._adapter.prepare()
def parameters(self, *args, **kwargs):
return self._adapter.parameters(*args, **kwargs)
......@@ -33,7 +33,7 @@ from paddle.fluid.dygraph.nn import Conv2D
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay
from model import Model, Loss, shape_hints
from model import Model, Loss, Input
from resnet import ResNet, ConvBNLayer
import logging
......@@ -152,7 +152,6 @@ class YOLOv3(Model):
act='leaky_relu'))
self.route_blocks.append(route)
@shape_hints(inputs=[None, 3, None, None], img_info=[None, 3])
def forward(self, inputs, img_info):
outputs = []
boxes = []
......@@ -208,10 +207,9 @@ class YOLOv3(Model):
class YoloLoss(Loss):
def __init__(self, num_classes=80, num_max_boxes=50):
def __init__(self, num_classes=80):
super(YoloLoss, self).__init__()
self.num_classes = num_classes
self.num_max_boxes = num_max_boxes
self.ignore_thresh = 0.7
self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45,
59, 119, 116, 90, 156, 198, 373, 326]
......@@ -240,16 +238,6 @@ class YoloLoss(Loss):
downsample //= 2
return losses
def infer_shape(self, _):
return [
[None, self.num_max_boxes, 4],
[None, self.num_max_boxes],
[None, self.num_max_boxes]
]
def infer_dtype(self, _):
return ['float32', 'int32', 'float32']
def make_optimizer(parameter_list=None):
base_lr = FLAGS.lr
......@@ -470,8 +458,7 @@ def run(model, loader, mode='train'):
start = time.time()
for idx, batch in enumerate(loader()):
losses, _ = getattr(model, mode)(
batch[0], batch[1], device='gpu', device_ids=device_ids)
losses = getattr(model, mode)(batch[0], batch[1])
total_loss += np.sum(losses)
if idx > 1: # skip first two steps
......@@ -521,7 +508,8 @@ def main():
os.mkdir('yolo_checkpoints')
with guard:
NUM_CLASSES=7
NUM_CLASSES = 7
NUM_MAX_BOXES = 50
model = YOLOv3(num_classes=NUM_CLASSES)
# XXX transfer learning
if FLAGS.pretrain_weights is not None:
......@@ -530,12 +518,18 @@ def main():
model.load(FLAGS.weights)
optim = make_optimizer(parameter_list=model.parameters())
anno_path = os.path.join(FLAGS.data, 'annotations', 'instances_val2017.json')
inputs = [Input([None, 3, None, None], 'float32', name='image'),
Input([None, 3], 'int32', name='img_info')]
labels = [Input([None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'),
Input([None, NUM_MAX_BOXES], 'int32', name='gt_label'),
Input([None, NUM_MAX_BOXES], 'float32', name='gt_score')]
model.prepare(optim,
YoloLoss(num_classes=NUM_CLASSES),
# For YOLOv3, output variable in train/eval is different,
# which is not supported by metric, add by callback later?
# metrics=COCOMetric(anno_path, with_background=False)
)
inputs=inputs,
labels = labels)
for e in range(epoch):
logger.info("======== train epoch {} ========".format(e))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册