提交 180233f2 编写于 作者: Q qingqing01

Remove device and device_ids in train/eval/test

上级 03b7e92f
...@@ -137,8 +137,6 @@ def main(): ...@@ -137,8 +137,6 @@ def main():
paddle.batch(paddle.dataset.mnist.test(), paddle.batch(paddle.dataset.mnist.test(),
batch_size=FLAGS.batch_size, drop_last=True), 1, 1) batch_size=FLAGS.batch_size, drop_last=True), 1, 1)
device_ids = list(range(FLAGS.num_devices))
with guard: with guard:
model = MNIST() model = MNIST()
optim = Momentum( optim = Momentum(
...@@ -159,8 +157,7 @@ def main(): ...@@ -159,8 +157,7 @@ def main():
val_acc = 0.0 val_acc = 0.0
print("======== train epoch {} ========".format(e)) print("======== train epoch {} ========".format(e))
for idx, batch in enumerate(train_loader()): for idx, batch in enumerate(train_loader()):
outputs, losses = model.train( outputs, losses = model.train(batch[0], batch[1])
batch[0], batch[1], device='gpu', device_ids=device_ids)
acc = accuracy(outputs[0], batch[1])[0] acc = accuracy(outputs[0], batch[1])[0]
train_loss += np.sum(losses) train_loss += np.sum(losses)
...@@ -171,8 +168,7 @@ def main(): ...@@ -171,8 +168,7 @@ def main():
print("======== eval epoch {} ========".format(e)) print("======== eval epoch {} ========".format(e))
for idx, batch in enumerate(val_loader()): for idx, batch in enumerate(val_loader()):
outputs, losses = model.eval( outputs, losses = model.eval(batch[0], batch[1])
batch[0], batch[1], device='gpu', device_ids=device_ids)
acc = accuracy(outputs[0], batch[1])[0] acc = accuracy(outputs[0], batch[1])[0]
val_loss += np.sum(losses) val_loss += np.sum(losses)
......
...@@ -110,8 +110,6 @@ class StaticGraphAdapter(object): ...@@ -110,8 +110,6 @@ class StaticGraphAdapter(object):
self._progs = {} self._progs = {}
self._compiled_progs = {} self._compiled_progs = {}
self._lazy_load_optimizer = None
@property @property
def mode(self): def mode(self):
return self.model.mode return self.model.mode
...@@ -120,19 +118,19 @@ class StaticGraphAdapter(object): ...@@ -120,19 +118,19 @@ class StaticGraphAdapter(object):
def mode(self, value): def mode(self, value):
self.model.mode = value self.model.mode = value
def train(self, inputs, labels=None, device='CPU', device_ids=None): def train(self, inputs, labels=None):
assert self.model._optimizer, \ assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first" "model not ready, please call `model.prepare()` first"
self.mode = 'train' self.mode = 'train'
return self._run(inputs, labels, device, device_ids) return self._run(inputs, labels)
def eval(self, inputs, labels=None, device='CPU', device_ids=None): def eval(self, inputs, labels=None):
self.mode = 'eval' self.mode = 'eval'
return self._run(inputs, labels, device, device_ids) return self._run(inputs, labels)
def test(self, inputs, device='CPU', device_ids=None): def test(self, inputs):
self.mode = 'test' self.mode = 'test'
return self._run(inputs, None, device, device_ids) return self._run(inputs, None)
def parameters(self, *args, **kwargs): def parameters(self, *args, **kwargs):
return None return None
...@@ -203,10 +201,8 @@ class StaticGraphAdapter(object): ...@@ -203,10 +201,8 @@ class StaticGraphAdapter(object):
assert '__static_graph_only__' in optim_state, \ assert '__static_graph_only__' in optim_state, \
"optimizer saved in dygraph mode is not usable in static graph" "optimizer saved in dygraph mode is not usable in static graph"
if self._executor is not None: assert self._executor
self._load_optimizer(optim_state) self._load_optimizer(optim_state)
else:
self._lazy_load_optimizer = optim_state
def _load_optimizer(self, state): def _load_optimizer(self, state):
prog = self._progs.get('train', None) prog = self._progs.get('train', None)
...@@ -237,25 +233,14 @@ class StaticGraphAdapter(object): ...@@ -237,25 +233,14 @@ class StaticGraphAdapter(object):
t.set(ndarray, place) t.set(ndarray, place)
def _run(self, inputs, labels=None, device='CPU', device_ids=None): def _run(self, inputs, labels=None, device='CPU'):
compiled_prog = self.prepare()
if self._progs.get(self.mode, None) is None:
if isinstance(self.model._inputs, dict):
ins = [self.model._inputs[n] \
for n in extract_args(self.model.forward) if n != 'self']
else:
ins = self.model._inputs
self._input_vars[self.mode] = [k.forward() for k in to_list(ins)]
self._make_program(self._input_vars[self.mode])
compiled_prog = self._compile_and_initialize(self._progs[self.mode],
device, device_ids)
inputs = to_list(inputs) inputs = to_list(inputs)
if labels is not None: if labels is not None:
labels = to_list(labels) labels = to_list(labels)
assert len(inputs) == len(self._input_vars[self.mode]), "number of inputs" \ assert len(inputs) == len(self._input_vars[self.mode]), \
"number of inputs" \
+ " does not match number of arguments of `forward` method" + " does not match number of arguments of `forward` method"
feed = {} feed = {}
...@@ -281,6 +266,13 @@ class StaticGraphAdapter(object): ...@@ -281,6 +266,13 @@ class StaticGraphAdapter(object):
else: else:
return out[:num_output], out[num_output:] return out[:num_output], out[num_output:]
def _get_loss(self, outputs):
assert self.model._loss_function
label_vars = [k.forward() for k in to_list(self.model._labels)]
self._label_vars[self.mode] = label_vars
losses = self.model._loss_function(outputs, label_vars)
return losses
def _make_program(self, inputs): def _make_program(self, inputs):
prog = self._orig_prog.clone() prog = self._orig_prog.clone()
if self.mode == 'train' and self.model._optimizer._learning_rate_map: if self.mode == 'train' and self.model._optimizer._learning_rate_map:
...@@ -292,7 +284,7 @@ class StaticGraphAdapter(object): ...@@ -292,7 +284,7 @@ class StaticGraphAdapter(object):
outputs = to_list(self.model.forward(*inputs)) outputs = to_list(self.model.forward(*inputs))
if self.mode != 'test': if self.mode != 'test':
losses = self._get_loss(outputs) losses = self._get_loss(outputs)
if self.mode == 'train': if self.mode == 'train' and self.model._optimizer:
self._loss_endpoint = fluid.layers.sum(losses) self._loss_endpoint = fluid.layers.sum(losses)
self.model._optimizer.minimize(self._loss_endpoint) self.model._optimizer.minimize(self._loss_endpoint)
if self.mode != 'train': # clone again to put it in test mode if self.mode != 'train': # clone again to put it in test mode
...@@ -305,22 +297,32 @@ class StaticGraphAdapter(object): ...@@ -305,22 +297,32 @@ class StaticGraphAdapter(object):
'output': outputs 'output': outputs
} }
def _get_loss(self, outputs): def prepare(self):
assert self.model._loss_function compiled_prog = self._compiled_progs.get(self.mode, None)
label_vars = [k.forward() for k in to_list(self.model._labels)] if compiled_prog is not None:
self._label_vars[self.mode] = label_vars return compiled_prog
losses = self.model._loss_function(outputs, label_vars)
return losses if isinstance(self.model._inputs, dict):
ins = [self.model._inputs[n] \
for n in extract_args(self.model.forward) if n != 'self']
else:
ins = self.model._inputs
self._input_vars[self.mode] = [k.forward() for k in to_list(ins)]
self._make_program(self._input_vars[self.mode])
return self._compile_and_initialize(self._progs[self.mode])
def _compile_and_initialize(self, prog, device='CPU', device_ids=None): def _compile_and_initialize(self, prog):
compiled_prog = self._compiled_progs.get(self.mode, None) compiled_prog = self._compiled_progs.get(self.mode, None)
if compiled_prog is not None: if compiled_prog is not None:
return compiled_prog return compiled_prog
places = [ device = self.model._device
device.lower() == 'gpu' and fluid.CUDAPlace(i) or fluid.CPUPlace() device_ids = self.model._device_ids
for i in device_ids
] if device.lower() == 'gpu':
places = fluid.cuda_places(device_ids)
else:
places = fluid.cpu_places(len(device_ids) if device_ids else None)
# XXX *ALL WEIGHTS* should be initialized upon model construction # XXX *ALL WEIGHTS* should be initialized upon model construction
# even if `forward()` may run different code path for different mode # even if `forward()` may run different code path for different mode
...@@ -338,12 +340,8 @@ class StaticGraphAdapter(object): ...@@ -338,12 +340,8 @@ class StaticGraphAdapter(object):
startup_prog = self._startup_prog._prune(uninitialized) startup_prog = self._startup_prog._prune(uninitialized)
self._executor.run(startup_prog) self._executor.run(startup_prog)
if self.mode == 'train' and self._lazy_load_optimizer:
self._load_optimizer(self._lazy_load_optimizer)
self._lazy_load_optimizer = None
compiled_prog = fluid.CompiledProgram(prog) compiled_prog = fluid.CompiledProgram(prog)
if len(device_ids) > 1: if len(places) > 1:
loss_name = None loss_name = None
if self.mode == 'train' and self._loss_endpoint is not None: if self.mode == 'train' and self._loss_endpoint is not None:
loss_name = self._loss_endpoint.name loss_name = self._loss_endpoint.name
...@@ -356,12 +354,10 @@ class StaticGraphAdapter(object): ...@@ -356,12 +354,10 @@ class StaticGraphAdapter(object):
# program will be run before the train program # program will be run before the train program
if self.mode == 'train' and 'eval' in self._compiled_progs: if self.mode == 'train' and 'eval' in self._compiled_progs:
del self._compiled_progs['eval'] del self._compiled_progs['eval']
compiled_prog = compiled_prog.with_data_parallel( compiled_prog = compiled_prog.with_data_parallel(
loss_name=loss_name, loss_name=loss_name,
places=places, places=places,
share_vars_from=share_vars_from) share_vars_from=share_vars_from)
self._compiled_progs[self.mode] = compiled_prog self._compiled_progs[self.mode] = compiled_prog
return compiled_prog return compiled_prog
...@@ -380,7 +376,7 @@ class DynamicGraphAdapter(object): ...@@ -380,7 +376,7 @@ class DynamicGraphAdapter(object):
self.model.mode = value self.model.mode = value
# TODO multi device in dygraph mode not implemented at present time # TODO multi device in dygraph mode not implemented at present time
def train(self, inputs, labels=None, device='CPU', device_ids=None): def train(self, inputs, labels=None):
assert self.model._optimizer, \ assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first" "model not ready, please call `model.prepare()` first"
super(Model, self.model).train() super(Model, self.model).train()
...@@ -397,7 +393,7 @@ class DynamicGraphAdapter(object): ...@@ -397,7 +393,7 @@ class DynamicGraphAdapter(object):
return [to_numpy(o) for o in to_list(outputs)], \ return [to_numpy(o) for o in to_list(outputs)], \
[to_numpy(l) for l in losses] [to_numpy(l) for l in losses]
def eval(self, inputs, labels=None, device='CPU', device_ids=None): def eval(self, inputs, labels=None):
super(Model, self.model).eval() super(Model, self.model).eval()
self.mode = 'eval' self.mode = 'eval'
inputs = to_list(inputs) inputs = to_list(inputs)
...@@ -408,7 +404,7 @@ class DynamicGraphAdapter(object): ...@@ -408,7 +404,7 @@ class DynamicGraphAdapter(object):
return [to_numpy(o) for o in to_list(outputs)], \ return [to_numpy(o) for o in to_list(outputs)], \
[to_numpy(l) for l in losses] [to_numpy(l) for l in losses]
def test(self, inputs, device='CPU', device_ids=None): def test(self, inputs):
super(Model, self.model).eval() super(Model, self.model).eval()
self.mode = 'test' self.mode = 'test'
inputs = [to_variable(x) for x in to_list(inputs)] inputs = [to_variable(x) for x in to_list(inputs)]
...@@ -453,6 +449,9 @@ class Model(fluid.dygraph.Layer): ...@@ -453,6 +449,9 @@ class Model(fluid.dygraph.Layer):
self._loss_weights = None self._loss_weights = None
self._loss = None self._loss = None
self._optimizer = None self._optimizer = None
self._device = None
self._device_ids = None
self._optimizer = None
if in_dygraph_mode(): if in_dygraph_mode():
self._adapter = DynamicGraphAdapter(self) self._adapter = DynamicGraphAdapter(self)
else: else:
...@@ -473,18 +472,45 @@ class Model(fluid.dygraph.Layer): ...@@ -473,18 +472,45 @@ class Model(fluid.dygraph.Layer):
def load(self, *args, **kwargs): def load(self, *args, **kwargs):
return self._adapter.load(*args, **kwargs) return self._adapter.load(*args, **kwargs)
def prepare(self, optimizer, loss_function=None, inputs=None, labels=None): def prepare(self,
optimizer=None,
loss_function=None,
inputs=None,
labels=None,
device=None,
device_ids=None):
""" """
FIXME: add comments FIXME: add comments
Args: Args:
optimizer (Optimizer|None): optimizer must be set in training
and should be a Optimizer instance. It can be None in eval
and test mode.
loss_function (Loss|None): loss function must be set in training
and should be a Loss instance. It can be None when there is
no loss.
inputs (Input|list|dict|None): inputs, entry points of network, inputs (Input|list|dict|None): inputs, entry points of network,
could be a Input layer, or lits of Input layers, or dict (name: ), or None. inputs (Input|list|dict|None): inputs, entry points of network,
For static graph, inputs must be set. For dynamic graph, it could could be a Input layer, or lits of Input layers,
be None. or dict (name: Input), or None. For static graph,
inputs must be set. For dynamic graph, it could be None.
labels (Input|list|dict|None): labels, entry points of network, labels (Input|list|dict|None): labels, entry points of network,
could be a Input layer or lits of Input layers, or None. could be a Input layer or lits of Input layers, or None.
For static graph, if set loss_function in Model.prepare(), it For static graph, if set loss_function in Model.prepare(), it
must be set. Otherwise, it could be None. must be set. Otherwise, it could be None.
device (str|None): specify device type, 'CPU' or 'GPU'.
If None, automatically select device according to
installation package version.
device_ids (list[int]|None): specify device index. If None,
the available device will be obtained from the environment
variable when the model is executed: If the GPU is used, the
currently available device ID is obtained from the environment
variable FLAGS_selected_gpus or CUDA_VISIBLE_DEVICES when the
model is executed; CPU, when the model is executed,
the currently available CPU number is obtained from the
environment variable CPU_NUM. For example, export CPU_NUM=4,
if the environment variable is not set, the executor will add
the variable to the environment variable and set its value to 1.
The default is None.
""" """
self._optimizer = optimizer self._optimizer = optimizer
if loss_function: if loss_function:
...@@ -500,6 +526,12 @@ class Model(fluid.dygraph.Layer): ...@@ -500,6 +526,12 @@ class Model(fluid.dygraph.Layer):
raise TypeError("'labels' must be list in static graph mode") raise TypeError("'labels' must be list in static graph mode")
self._inputs = inputs self._inputs = inputs
self._labels = labels self._labels = labels
self._device = device
if device is None:
self._device = 'GPU' if fluid.is_compiled_with_cuda() else 'CPU'
self._device_ids = device_ids
if not in_dygraph_mode():
self._adapter.prepare()
def parameters(self, *args, **kwargs): def parameters(self, *args, **kwargs):
return self._adapter.parameters(*args, **kwargs) return self._adapter.parameters(*args, **kwargs)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册