未验证 提交 7c92177c 编写于 作者: Z zhaoyingli 提交者: GitHub

[AutoParallel] add callbacks (#47014)

* [AutoParallel] add callbacks

* fix unittest

* fix dist_context

* fix engine

* fix cmakelist

* fix unittest's returns

* fix cmakelist
上级 b9a2f29c
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import time
import paddle
from paddle.hapi.callbacks import ProgBarLogger, ModelCheckpoint, LRScheduler, CallbackList, Callback
from .interface import CollectionNames, get_collection
def config_callbacks(callbacks=None,
engine=None,
batch_size=None,
epochs=None,
steps=None,
log_freq=2,
verbose=2,
save_freq=1,
save_dir=None,
metrics=None,
acc_step=1,
mode='train'):
cbks = callbacks or []
cbks = cbks if isinstance(cbks, (list, tuple)) else [cbks]
if not any(isinstance(k, ProgBarLogger) for k in cbks) and verbose:
cbks = [ProgBarLoggerAuto(log_freq, verbose=verbose)] + cbks
if not any(isinstance(k, LRScheduler) for k in cbks):
cbks = [LRSchedulerAuto()] + cbks
if not any(isinstance(k, ModelCheckpoint) for k in cbks):
cbks = cbks + [ModelCheckpointAuto(save_freq, save_dir)]
if not any(isinstance(k, Profiler) for k in cbks) and verbose == 3:
cbks = cbks + [Profiler(timer_only=True)]
if not any(isinstance(k, History) for k in cbks):
cbks = cbks + [History()]
for i, k in enumerate(cbks):
if isinstance(k, ProgBarLogger):
cbks[i] = ProgBarLoggerAuto(k.log_freq, k.verbose)
if isinstance(k, LRScheduler):
cbks[i] = LRSchedulerAuto(k.by_step, k.by_epoch)
if isinstance(k, ModelCheckpoint):
cbks[i] = ModelCheckpointAuto(k.save_freq, k.save_dir)
cbk_list = CallbackList(cbks)
cbk_list.set_model(engine)
metrics = metrics or [] if mode != 'test' else []
params = {
'batch_size': batch_size,
'epochs': epochs,
'steps': steps,
'verbose': verbose,
'metrics': metrics,
'acc_step': acc_step,
}
cbk_list.set_params(params)
return cbk_list
class ProgBarLoggerAuto(ProgBarLogger):
def __init__(self, log_freq=1, verbose=2):
super(ProgBarLoggerAuto, self).__init__(log_freq, verbose)
def _is_print(self):
return True
def _updates(self, logs, mode):
values = []
metrics = getattr(self, '%s_metrics' % (mode))
progbar = getattr(self, '%s_progbar' % (mode))
steps = getattr(self, '%s_step' % (mode))
for k in metrics:
if k in logs:
values.append((k, logs[k]))
if 'lr' in logs:
values.append(('lr', logs['lr']))
fetches_logs = logs.get('fetches', {})
collect_logging = get_collection(CollectionNames.LOGGING)
for name, var in collect_logging:
k = name or var.name
if k in fetches_logs:
values.append((k, fetches_logs[k]))
out_logs = logs.get('outputs', {})
for k in out_logs:
values.append((k, out_logs[k]))
if self.verbose == 3 and hasattr(self, '_%s_timer' % (mode)):
timer = getattr(self, '_%s_timer' % (mode))
cnt = timer['count'] if timer['count'] > 0 else 1.0
samples = timer['samples'] if timer['samples'] > 0 else 1.0
values.append(
('avg_reader_cost', "%.5f sec" % (timer['data_time'] / cnt)))
values.append(
('avg_batch_cost', "%.5f sec" % (timer['batch_time'] / cnt)))
values.append(
('ips', "%.5f samples/sec" %
(samples / (timer['data_time'] + timer['batch_time']))))
timer['count'] = 0
timer['samples'] = 0
timer['data_time'] = 0.
timer['batch_time'] = 0.
progbar.update(steps, values)
def on_eval_batch_end(self, step, logs=None):
logs = logs or {}
self.eval_step += 1
samples = self.params['batch_size']
self.evaled_samples += samples
self._eval_timer['batch_time'] += (
time.time() - self._eval_timer['batch_data_end_time'])
self._eval_timer['count'] += 1
samples = self.params['batch_size']
self._eval_timer['samples'] += samples
if self._is_print() and self.eval_step % self.log_freq == 0:
if self.eval_steps is None or self.eval_step < self.eval_steps:
self._updates(logs, 'eval')
self._eval_timer['batch_start_time'] = time.time()
class LRSchedulerAuto(LRScheduler):
def __init__(self, by_step=True, by_epoch=False):
super(LRSchedulerAuto, self).__init__(by_step, by_epoch)
def on_epoch_begin(self, epoch=None, logs=None):
self.acc_step = self.params["acc_step"]
self.epoch = epoch
self.train_step = 0
def on_train_batch_end(self, step, logs=None):
self.train_step += 1
if self.by_step and self.train_step % self.acc_step == 0:
if self.model._optimizer and \
hasattr(self.model._optimizer, '_learning_rate') and \
isinstance(self.model._optimizer._learning_rate,
paddle.optimizer.lr.LRScheduler):
self.model._optimizer._learning_rate.step()
class History(Callback):
def __init__(self):
self.history = {}
def on_train_begin(self, logs=None):
self.epoch = []
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
self.epoch.append(epoch)
for k, v in logs.items():
self.history.setdefault(k, []).append(v)
self.model.history = self
class Profiler(Callback):
def __init__(self, *args, **kwargs):
self.prof = paddle.profiler.Profiler(*args, **kwargs)
def on_epoch_begin(self, epoch=None, logs=None):
self.epoch = epoch
self.train_step = 0
self.batch_size = self.params["batch_size"]
self.steps = self.params['steps']
def on_train_begin(self, logs=None):
self.prof.start()
def on_train_batch_end(self, step, logs=None):
self.train_step += 1
self.prof.step(num_samples=self.batch_size)
print("step {}:{}".format(self.train_step,
self.prof.step_info(unit='samples')))
def on_train_end(self, logs=None):
self.prof.stop()
self.prof.summary()
class ModelCheckpointAuto(ModelCheckpoint):
def __init__(self, *args, **kwargs):
super(ModelCheckpointAuto, self).__init__(*args, **kwargs)
def _is_save(self):
return self.model and self.save_dir
def on_epoch_end(self, epoch, logs=None):
if self._is_save() and (self.epoch + 1) % self.save_freq == 0:
path = '{}/epoch{}'.format(self.save_dir, epoch)
print('save checkpoint at {}'.format(os.path.abspath(path)))
self.model.save(path)
def on_train_end(self, logs=None):
if self._is_save():
path = '{}/final'.format(self.save_dir)
print('save checkpoint at {}'.format(os.path.abspath(path)))
self.model.save(path)
...@@ -74,7 +74,6 @@ class DistributedContext: ...@@ -74,7 +74,6 @@ class DistributedContext:
self._serial_optimizer = None self._serial_optimizer = None
self._serial_feed_vars = {} self._serial_feed_vars = {}
self._serial_fetch_vars = {} self._serial_fetch_vars = {}
self._lr_optimizer = None # record the optimzier holding lr_scheduler
# Data members related to the program # Data members related to the program
self._dist_tensors_for_program = {} self._dist_tensors_for_program = {}
...@@ -870,7 +869,7 @@ class DistributedContext: ...@@ -870,7 +869,7 @@ class DistributedContext:
"_serial_ordered_nodes", "_serial_ordered_tensor_nodes", \ "_serial_ordered_nodes", "_serial_ordered_tensor_nodes", \
"_serial_ordered_op_nodes", "_original_serial_loss", \ "_serial_ordered_op_nodes", "_original_serial_loss", \
"_original_serial_feed_vars", "_original_serial_fetch_vars", \ "_original_serial_feed_vars", "_original_serial_fetch_vars", \
"_serial_loss", "_serial_feed_vars", "_serial_fetch_vars", "_lr_optimizer", \ "_serial_loss", "_serial_feed_vars", "_serial_fetch_vars", "_serial_optimizer", \
"_backup_serial_main_program_stack", "_backup_serial_startup_program_stack", \ "_backup_serial_main_program_stack", "_backup_serial_startup_program_stack", \
"_pass_context"]: "_pass_context"]:
setattr(result, k, v) setattr(result, k, v)
......
...@@ -214,8 +214,12 @@ def add_to_collection(collection_name, value, name=None): ...@@ -214,8 +214,12 @@ def add_to_collection(collection_name, value, name=None):
if collection_name not in _g_collections: if collection_name not in _g_collections:
_g_collections[collection_name] = [] _g_collections[collection_name] = []
if name is not None: if name is not None:
for _, v in _g_collections[collection_name]:
if v == value: return
_g_collections[collection_name].append((name, value)) _g_collections[collection_name].append((name, value))
else: else:
for _, v in _g_collections[collection_name]:
if v == value: return
_g_collections[collection_name].append((None, value)) _g_collections[collection_name].append((None, value))
......
...@@ -23,10 +23,10 @@ import logging ...@@ -23,10 +23,10 @@ import logging
import pickle import pickle
import time import time
import paddle import paddle
from paddle.fluid.backward import append_backward
from paddle.distributed.utils.log_utils import get_logger
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid import program_guard from paddle.fluid import program_guard
from paddle.fluid.backward import append_backward
from paddle.distributed.utils.log_utils import get_logger
from paddle.distributed.passes import new_pass, PassContext from paddle.distributed.passes import new_pass, PassContext
from .dist_context import DistributedContext from .dist_context import DistributedContext
from .dist_context import set_default_distributed_context from .dist_context import set_default_distributed_context
...@@ -39,7 +39,6 @@ from .process_group import _g_process_group_map, ProcessGroup ...@@ -39,7 +39,6 @@ from .process_group import _g_process_group_map, ProcessGroup
from .utils import make_data_unshard from .utils import make_data_unshard
from .utils import set_grad_var_shape from .utils import set_grad_var_shape
from .utils import SerialProgramInfo from .utils import SerialProgramInfo
from .utils import get_logger
from .reshard import Resharder from .reshard import Resharder
from .cluster import Cluster from .cluster import Cluster
from .mapper import mapping from .mapper import mapping
...@@ -147,7 +146,7 @@ class AutoParallelizer: ...@@ -147,7 +146,7 @@ class AutoParallelizer:
with program_guard(main_program, startup_program): with program_guard(main_program, startup_program):
optimize_ops = optimizer.apply_gradients(params_grads) optimize_ops = optimizer.apply_gradients(params_grads)
self._dist_context._lr_optimizer = optimizer self._dist_context._serial_optimizer = optimizer
# update completion # update completion
self._completer = Completer(self._dist_context) self._completer = Completer(self._dist_context)
self._completer.complete_update_annotation(main_program) self._completer.complete_update_annotation(main_program)
......
...@@ -24,8 +24,8 @@ from paddle.distributed.passes import new_pass ...@@ -24,8 +24,8 @@ from paddle.distributed.passes import new_pass
from .reshard import Resharder from .reshard import Resharder
from .partitioner import Partitioner from .partitioner import Partitioner
from .utils import set_grad_var_shape from .utils import set_grad_var_shape
from .utils import get_logger
from .process_group import get_world_process_group from .process_group import get_world_process_group
from ..utils.log_utils import get_logger
class Parallelizer: class Parallelizer:
...@@ -62,7 +62,7 @@ class Parallelizer: ...@@ -62,7 +62,7 @@ class Parallelizer:
serial_main_program, serial_startup_program, params_grads = self._apply_pre_optimization( serial_main_program, serial_startup_program, params_grads = self._apply_pre_optimization(
serial_main_program, serial_startup_program, serial_loss, serial_main_program, serial_startup_program, serial_loss,
serial_optimizer, params_grads) serial_optimizer, params_grads)
self._logger.info( self._logger.debug(
"within parallel apply_pre_optimization time: {}, mode {}". "within parallel apply_pre_optimization time: {}, mode {}".
format(time.time() - time0, self._mode)) format(time.time() - time0, self._mode))
# Do logical partition # Do logical partition
...@@ -70,14 +70,14 @@ class Parallelizer: ...@@ -70,14 +70,14 @@ class Parallelizer:
partitioner = Partitioner(self._dist_context, rank) partitioner = Partitioner(self._dist_context, rank)
dist_main_prog, dist_startup_prog, dist_params_grads = partitioner.partition( dist_main_prog, dist_startup_prog, dist_params_grads = partitioner.partition(
serial_main_program, serial_startup_program, params_grads) serial_main_program, serial_startup_program, params_grads)
self._logger.info( self._logger.debug(
"within parallel partitioner time: {}, mode {}".format( "within parallel partitioner time: {}, mode {}".format(
time.time() - time0, self._mode)) time.time() - time0, self._mode))
# Generate optimizer # Generate optimizer
time0 = time.time() time0 = time.time()
self._generate_optimizer(dist_main_prog, dist_startup_prog, self._generate_optimizer(dist_main_prog, dist_startup_prog,
serial_optimizer, dist_params_grads) serial_optimizer, dist_params_grads)
self._logger.info( self._logger.debug(
"within parallel optimizer time: {}, mode {}".format( "within parallel optimizer time: {}, mode {}".format(
time.time() - time0, self._mode)) time.time() - time0, self._mode))
# Do reshard process # Do reshard process
...@@ -86,14 +86,14 @@ class Parallelizer: ...@@ -86,14 +86,14 @@ class Parallelizer:
resharder = Resharder(dist_main_prog, dist_startup_prog, rank, resharder = Resharder(dist_main_prog, dist_startup_prog, rank,
self._dist_context, dist_params_grads) self._dist_context, dist_params_grads)
resharder.reshard() resharder.reshard()
self._logger.info( self._logger.debug(
"within parallel reshard time: {}, mode {}".format( "within parallel reshard time: {}, mode {}".format(
time.time() - time0, self._mode)) time.time() - time0, self._mode))
# Apply post optimization passes # Apply post optimization passes
time0 = time.time() time0 = time.time()
self._apply_post_optimization(dist_main_prog, dist_startup_prog, self._apply_post_optimization(dist_main_prog, dist_startup_prog,
rank, dist_params_grads) rank, dist_params_grads)
self._logger.info( self._logger.debug(
"within parallel apply_post_optimization time: {}, mode {}". "within parallel apply_post_optimization time: {}, mode {}".
format(time.time() - time0, self._mode)) format(time.time() - time0, self._mode))
else: else:
...@@ -102,7 +102,7 @@ class Parallelizer: ...@@ -102,7 +102,7 @@ class Parallelizer:
self._apply_pre_optimization(serial_main_program, self._apply_pre_optimization(serial_main_program,
serial_startup_program, None, None, serial_startup_program, None, None,
None) None)
self._logger.info( self._logger.debug(
"within parallel apply_pre_optimization time: {}, mode {}". "within parallel apply_pre_optimization time: {}, mode {}".
format(time.time() - time0, self._mode)) format(time.time() - time0, self._mode))
# Do logical partition # Do logical partition
...@@ -111,14 +111,14 @@ class Parallelizer: ...@@ -111,14 +111,14 @@ class Parallelizer:
dist_main_prog, dist_startup_prog, dist_params_grads = partitioner.partition( dist_main_prog, dist_startup_prog, dist_params_grads = partitioner.partition(
serial_main_program, serial_startup_program, []) serial_main_program, serial_startup_program, [])
# Do reshard process # Do reshard process
self._logger.info( self._logger.debug(
"within parallel partitioner time: {}, mode {}".format( "within parallel partitioner time: {}, mode {}".format(
time.time() - time0, self._mode)) time.time() - time0, self._mode))
time0 = time.time() time0 = time.time()
resharder = Resharder(dist_main_prog, dist_startup_prog, rank, resharder = Resharder(dist_main_prog, dist_startup_prog, rank,
self._dist_context, [], 1) self._dist_context, [], 1)
resharder.reshard() resharder.reshard()
self._logger.info( self._logger.debug(
"within parallel reshard time: {}, mode {}".format( "within parallel reshard time: {}, mode {}".format(
time.time() - time0, self._mode)) time.time() - time0, self._mode))
# Clone program for test # Clone program for test
...@@ -143,7 +143,7 @@ class Parallelizer: ...@@ -143,7 +143,7 @@ class Parallelizer:
# NOTE: `apply_gradients` will add an Accumulator for a parameter only once, # NOTE: `apply_gradients` will add an Accumulator for a parameter only once,
# but optimizer will be called repeatedly in re-launch, so optimizer need to be copied. # but optimizer will be called repeatedly in re-launch, so optimizer need to be copied.
optimizer = copy.deepcopy(optimizer) optimizer = copy.deepcopy(optimizer)
self._dist_context._lr_optimizer = optimizer self._dist_context._serial_optimizer = optimizer
with program_guard(main_program, startup_program): with program_guard(main_program, startup_program):
with unique_name.guard("opt_"): with unique_name.guard("opt_"):
optimizer_ops = optimizer.apply_gradients(params_grads) optimizer_ops = optimizer.apply_gradients(params_grads)
...@@ -170,9 +170,7 @@ class Parallelizer: ...@@ -170,9 +170,7 @@ class Parallelizer:
startup_program = self._pass_context.get_attr("startup_program") startup_program = self._pass_context.get_attr("startup_program")
params_grads = self._pass_context.get_attr("params_grads") params_grads = self._pass_context.get_attr("params_grads")
# apply amp pass # apply amp pass on train/eval/predict
# FIXME we disenable amp for eval since it has a little bug with
# eval program and which will be fixed in future
if self._strategy.amp.enable: if self._strategy.amp.enable:
config = copy.deepcopy(self._strategy.amp.to_dict()) config = copy.deepcopy(self._strategy.amp.to_dict())
config["dist_context"] = self._dist_context config["dist_context"] = self._dist_context
......
...@@ -1587,3 +1587,18 @@ def find_higher_order_backward_op(program): ...@@ -1587,3 +1587,18 @@ def find_higher_order_backward_op(program):
return True return True
return False return False
def get_lr(optimizer):
if isinstance(optimizer, paddle.optimizer.Optimizer):
return optimizer.get_lr()
elif isinstance(optimizer, paddle.fluid.optimizer.Optimizer):
if isinstance(optimizer._learning_rate, float):
return optimizer._learning_rate
else:
return optimizer._learning_rate()
else:
raise TypeError(
"'optimizer' must be object of class `paddle.optimizer.Optimizer`" \
" or `paddle.fluid.optimizer.Optimizer`, but got {}.".format(type(optimizer))
)
...@@ -212,7 +212,7 @@ class ClipGradByGloblNormPass(PassBase): ...@@ -212,7 +212,7 @@ class ClipGradByGloblNormPass(PassBase):
if self.get_attr("dist_context") is None: if self.get_attr("dist_context") is None:
return False return False
dist_context = self.get_attr("dist_context") dist_context = self.get_attr("dist_context")
if dist_context._lr_optimizer._grad_clip is None: if dist_context._serial_optimizer._grad_clip is None:
return False return False
if self.get_attr("params_grads") is None: if self.get_attr("params_grads") is None:
return False return False
......
...@@ -60,6 +60,9 @@ if(WITH_DISTRIBUTE AND WITH_GPU) ...@@ -60,6 +60,9 @@ if(WITH_DISTRIBUTE AND WITH_GPU)
py_test_modules(test_pass_amp MODULES test_pass_amp ENVS ${dist_ENVS}) py_test_modules(test_pass_amp MODULES test_pass_amp ENVS ${dist_ENVS})
set_tests_properties(test_pass_amp PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" set_tests_properties(test_pass_amp PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE"
TIMEOUT 50) TIMEOUT 50)
py_test_modules(test_engine_callbacks MODULES test_engine_callbacks)
set_tests_properties(test_engine_callbacks
PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 50)
py_test_modules(test_while_op_completion MODULES test_while_op_completion py_test_modules(test_while_op_completion MODULES test_while_op_completion
ENVS ${dist_ENVS}) ENVS ${dist_ENVS})
...@@ -100,5 +103,4 @@ if(WITH_DISTRIBUTE AND WITH_GPU) ...@@ -100,5 +103,4 @@ if(WITH_DISTRIBUTE AND WITH_GPU)
py_test_modules(test_dist_assign MODULES test_dist_assign) py_test_modules(test_dist_assign MODULES test_dist_assign)
py_test_modules(test_conditional_block_reshard MODULES py_test_modules(test_conditional_block_reshard MODULES
test_conditional_block_reshard) test_conditional_block_reshard)
endif() endif()
...@@ -87,27 +87,27 @@ class TestAMPPass(unittest.TestCase): ...@@ -87,27 +87,27 @@ class TestAMPPass(unittest.TestCase):
def test_amp_pass(self): def test_amp_pass(self):
# mp2 training # mp2 training
mp_engine = self.get_engine() mp_engine = self.get_engine()
outs = mp_engine.fit(self.dataset, 3, batch_size=self.batch_size) history = mp_engine.fit(self.dataset, 3, batch_size=self.batch_size)
mp_losses = np.array(outs["loss"]) mp_losses = np.array(history.history["loss"])
# mp2 amp-o1 training # mp2 amp-o1 training
amp_o1_engine = self.get_engine(True, "o1") amp_o1_engine = self.get_engine(True, "o1")
outs = amp_o1_engine.fit(self.dataset, 3, batch_size=self.batch_size) history = amp_o1_engine.fit(self.dataset, 3, batch_size=self.batch_size)
amp_o1_losses = np.array(outs["loss"]) amp_o1_losses = np.array(history.history["loss"])
amp_o1_engine.evaluate(self.dataset, 3, batch_size=self.batch_size) amp_o1_engine.evaluate(self.dataset, 3, batch_size=self.batch_size)
# self.check_results(mp_losses, amp_o1_losses) # self.check_results(mp_losses, amp_o1_losses)
# mp2 amp-o2 training # mp2 amp-o2 training
amp_o2_engine = self.get_engine(True, "o2") amp_o2_engine = self.get_engine(True, "o2")
outs = amp_o2_engine.fit(self.dataset, 3, batch_size=self.batch_size) history = amp_o2_engine.fit(self.dataset, 3, batch_size=self.batch_size)
amp_o2_losses = np.array(outs["loss"]) amp_o2_losses = np.array(history.history["loss"])
amp_o2_engine.evaluate(self.dataset, 3, batch_size=self.batch_size) amp_o2_engine.evaluate(self.dataset, 3, batch_size=self.batch_size)
# self.check_results(mp_losses, amp_o2_losses) # self.check_results(mp_losses, amp_o2_losses)
# mp2 amp-o3 training # mp2 amp-o3 training
amp_o3_engine = self.get_engine(True, "o3") amp_o3_engine = self.get_engine(True, "o3")
outs = amp_o3_engine.fit(self.dataset, 3, batch_size=self.batch_size) history = amp_o3_engine.fit(self.dataset, 3, batch_size=self.batch_size)
amp_o3_losses = np.array(outs["loss"]) amp_o3_losses = np.array(history.history["loss"])
amp_o3_engine.evaluate(self.dataset, 3, batch_size=self.batch_size) amp_o3_engine.evaluate(self.dataset, 3, batch_size=self.batch_size)
# self.check_results(mp_losses, amp_o3_losses) # self.check_results(mp_losses, amp_o3_losses)
......
...@@ -25,6 +25,7 @@ from paddle.io import Dataset ...@@ -25,6 +25,7 @@ from paddle.io import Dataset
from paddle.distributed.fleet import auto from paddle.distributed.fleet import auto
paddle.enable_static() paddle.enable_static()
global_process_mesh = auto.ProcessMesh(mesh=[0, 1]) global_process_mesh = auto.ProcessMesh(mesh=[0, 1])
PP_MESH_0 = auto.ProcessMesh([0]) PP_MESH_0 = auto.ProcessMesh([0])
PP_MESH_1 = auto.ProcessMesh([1]) PP_MESH_1 = auto.ProcessMesh([1])
...@@ -113,7 +114,7 @@ class MLPLayer(nn.Layer): ...@@ -113,7 +114,7 @@ class MLPLayer(nn.Layer):
if is_feed: if is_feed:
my_feed_vars.append((out, out.shape)) my_feed_vars.append((out, out.shape))
if is_fetch: if is_fetch:
auto.fetch(out, "my_out", logging=True) auto.fetch(out, "my_fetch", logging=True)
return out return out
...@@ -140,10 +141,11 @@ def train_high_level(fetch): ...@@ -140,10 +141,11 @@ def train_high_level(fetch):
# train # train
train_dataset = MyDataset(batch_num * batch_size) train_dataset = MyDataset(batch_num * batch_size)
eval_dataset1 = MyDataset(5 * batch_size) eval_dataset1 = MyDataset(5 * batch_size)
engine.fit(train_data=train_dataset, history = engine.fit(train_data=train_dataset,
epochs=2, epochs=2,
batch_size=batch_size, batch_size=batch_size,
valid_data=eval_dataset1) valid_data=eval_dataset1,
log_freq=1)
# eval # eval
eval_dataset2 = MyDataset(batch_size) eval_dataset2 = MyDataset(batch_size)
...@@ -151,7 +153,7 @@ def train_high_level(fetch): ...@@ -151,7 +153,7 @@ def train_high_level(fetch):
# predict # predict
test_dataset = MyDataset(batch_size) test_dataset = MyDataset(batch_size)
engine.predict(test_dataset, batch_size=batch_size) outputs = engine.predict(test_dataset, batch_size=batch_size)
# save # save
temp_dir = tempfile.TemporaryDirectory() temp_dir = tempfile.TemporaryDirectory()
......
...@@ -83,25 +83,32 @@ class TestGradientMergePass(unittest.TestCase): ...@@ -83,25 +83,32 @@ class TestGradientMergePass(unittest.TestCase):
def test_gradient_merge_pass(self): def test_gradient_merge_pass(self):
# dp2 training # dp2 training
dp_engine = self.get_engine() dp_engine = self.get_engine()
outs = dp_engine.fit(self.dataset, 3, batch_size=self.batch_size) history = dp_engine.fit(self.dataset,
dp_losses = np.array(outs["loss"]) 3,
batch_size=self.batch_size,
log_freq=1)
dp_losses = np.array(history.history["loss"])
# dp2 gradient merge training # dp2 gradient merge training
gm_engine = self.get_engine(True) gm_engine = self.get_engine(True)
outs = gm_engine.fit(self.dataset, 3, batch_size=self.batch_size) history = gm_engine.fit(self.dataset,
gm_losses = np.array(outs["loss"]) 3,
batch_size=self.batch_size,
avg_loss = 0 log_freq=1)
pass_avg_ret_list = [] gm_losses = np.array(history.history["loss"])
for i, pass_ret in enumerate(gm_losses):
if (i + 1) % 4 == 0: # avg_loss = 0
avg_loss += pass_ret # pass_avg_ret_list = []
pass_avg_ret_list.append(avg_loss / 4) # for i, pass_ret in enumerate(gm_losses):
avg_loss = 0 # if (i + 1) % 4 == 0:
else: # avg_loss += pass_ret
avg_loss += pass_ret # pass_avg_ret_list.append(avg_loss / 4)
# avg_loss = 0
# self.check_results(dp_losses, np.array(pass_avg_ret_list)) # else:
# avg_loss += pass_ret
# NOTE: every sample data from dataset is all the same
self.check_results(dp_losses, gm_losses)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -78,13 +78,13 @@ class TestRecomputePass(unittest.TestCase): ...@@ -78,13 +78,13 @@ class TestRecomputePass(unittest.TestCase):
def test_recompute_pass(self): def test_recompute_pass(self):
# mp2 training # mp2 training
mp_engine = self.get_engine() mp_engine = self.get_engine()
outs = mp_engine.fit(self.dataset, 3, batch_size=self.batch_size) history = mp_engine.fit(self.dataset, 3, batch_size=self.batch_size)
mp_losses = np.array(outs["loss"]) mp_losses = np.array(history.history["loss"])
# mp2 recompute training # mp2 recompute training
rc_engine = self.get_engine(True) rc_engine = self.get_engine(True)
outs = rc_engine.fit(self.dataset, 3, batch_size=self.batch_size) history = rc_engine.fit(self.dataset, 3, batch_size=self.batch_size)
rc_losses = np.array(outs["loss"]) rc_losses = np.array(history.history["loss"])
self.check_results(mp_losses, rc_losses) self.check_results(mp_losses, rc_losses)
......
...@@ -83,25 +83,31 @@ class TestShardingPass(unittest.TestCase): ...@@ -83,25 +83,31 @@ class TestShardingPass(unittest.TestCase):
def test_sharding_pass(self): def test_sharding_pass(self):
# dp2 training # dp2 training
dp_engine = self.get_engine() dp_engine = self.get_engine()
dp_losses = dp_engine.fit(self.dataset, 3, batch_size=self.batch_size) history = dp_engine.fit(self.dataset, 3, batch_size=self.batch_size)
dp_losses = np.array(dp_losses["loss"]) dp_losses = np.array(history.history["loss"])
# sharding2 stage1 training # sharding2 stage1 training
sharding1_engine = self.get_engine(True, 1) sharding1_engine = self.get_engine(True, 1)
outs = sharding1_engine.fit(self.dataset, 3, batch_size=self.batch_size) history = sharding1_engine.fit(self.dataset,
sharding1_losses = np.array(outs["loss"]) 3,
batch_size=self.batch_size)
sharding1_losses = np.array(history.history["loss"])
self.check_results(dp_losses, sharding1_losses) self.check_results(dp_losses, sharding1_losses)
# sharding2 stage2 training # sharding2 stage2 training
sharding2_engine = self.get_engine(True, 2) sharding2_engine = self.get_engine(True, 2)
outs = sharding2_engine.fit(self.dataset, 3, batch_size=self.batch_size) history = sharding2_engine.fit(self.dataset,
sharding2_losses = np.array(outs["loss"]) 3,
batch_size=self.batch_size)
sharding2_losses = np.array(history.history["loss"])
self.check_results(dp_losses, sharding2_losses) self.check_results(dp_losses, sharding2_losses)
# sharding2 stage3 training # sharding2 stage3 training
sharding3_engine = self.get_engine(True, 3) sharding3_engine = self.get_engine(True, 3)
outs = sharding3_engine.fit(self.dataset, 3, batch_size=self.batch_size) history = sharding3_engine.fit(self.dataset,
sharding3_losses = np.array(outs["loss"]) 3,
batch_size=self.batch_size)
sharding3_losses = np.array(history.history["loss"])
self.check_results(dp_losses, sharding3_losses) self.check_results(dp_losses, sharding3_losses)
......
...@@ -195,7 +195,7 @@ class TestDistributedContext(unittest.TestCase): ...@@ -195,7 +195,7 @@ class TestDistributedContext(unittest.TestCase):
"_serial_ordered_nodes", "_serial_ordered_tensor_nodes", \ "_serial_ordered_nodes", "_serial_ordered_tensor_nodes", \
"_serial_ordered_op_nodes", "_original_serial_loss", \ "_serial_ordered_op_nodes", "_original_serial_loss", \
"_original_serial_feed_vars", "_original_serial_fetch_vars", \ "_original_serial_feed_vars", "_original_serial_fetch_vars", \
"_serial_loss", "_serial_feed_vars", "_serial_fetch_vars", "_lr_optimizer", \ "_serial_loss", "_serial_feed_vars", "_serial_fetch_vars", "_serial_optimizer", \
"_backup_serial_main_program_stack", "_backup_serial_startup_program_stack", \ "_backup_serial_main_program_stack", "_backup_serial_startup_program_stack", \
"_pass_context"] "_pass_context"]
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import tempfile
import shutil
import time
import random
import paddle
import paddle.vision.transforms as T
from paddle.static import InputSpec
from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.callbacks import config_callbacks
from paddle.vision.models import LeNet
from paddle.vision.datasets import MNIST
paddle.enable_static()
class TestCallbacks(unittest.TestCase):
def setUp(self):
self.save_dir = tempfile.mkdtemp()
def tearDown(self):
shutil.rmtree(self.save_dir)
def run_callback(self):
epochs = 2
steps = 5
freq = 2
eval_steps = 2
inputs_spec = [InputSpec([None, 1, 28, 28], 'float32', 'image')]
strategy = auto.Strategy()
strategy.auto_mode = "semi"
engine = auto.Engine(LeNet(), strategy=strategy)
engine.prepare(inputs_spec, mode="predict")
cbks = config_callbacks(engine=engine,
batch_size=128,
epochs=epochs,
steps=steps,
log_freq=freq,
verbose=self.verbose,
metrics=['loss', 'acc'],
save_dir=self.save_dir)
cbks.on_begin('train')
logs = {'loss': 50.341673, 'acc': 0.00256}
for epoch in range(epochs):
cbks.on_epoch_begin(epoch)
for step in range(steps):
cbks.on_batch_begin('train', step, logs)
logs['loss'] -= random.random() * 0.1
logs['acc'] += random.random() * 0.1
time.sleep(0.005)
cbks.on_batch_end('train', step, logs)
cbks.on_epoch_end(epoch, logs)
eval_logs = {'eval_loss': 20.341673, 'eval_acc': 0.256}
params = {
'steps': eval_steps,
'metrics': ['eval_loss', 'eval_acc'],
}
cbks.on_begin('eval', params)
for step in range(eval_steps):
cbks.on_batch_begin('eval', step, eval_logs)
eval_logs['eval_loss'] -= random.random() * 0.1
eval_logs['eval_acc'] += random.random() * 0.1
eval_logs['batch_size'] = 2
time.sleep(0.005)
cbks.on_batch_end('eval', step, eval_logs)
cbks.on_end('eval', eval_logs)
test_logs = {}
params = {'steps': eval_steps}
cbks.on_begin('predict', params)
for step in range(eval_steps):
cbks.on_batch_begin('predict', step, test_logs)
test_logs['batch_size'] = 2
time.sleep(0.005)
cbks.on_batch_end('predict', step, test_logs)
cbks.on_end('predict', test_logs)
cbks.on_end('train')
print(engine.history.history)
def test_callback_verbose_0(self):
self.verbose = 0
self.run_callback()
def test_callback_verbose_1(self):
self.verbose = 1
self.run_callback()
def test_callback_verbose_2(self):
self.verbose = 2
self.run_callback()
def test_callback_verbose_3(self):
self.verbose = 3
self.run_callback()
class TestCallbacksEngine(unittest.TestCase):
def setUp(self):
self.save_dir = tempfile.mkdtemp()
transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
self.train_dataset = MNIST(mode='train', transform=transform)
self.test_dataset = MNIST(mode='test', transform=transform)
self.prepare_engine()
def tearDown(self):
shutil.rmtree(self.save_dir)
def prepare_engine(self):
model = paddle.vision.models.LeNet()
loss = paddle.nn.CrossEntropyLoss()
base_lr = 1e-3
boundaries = [5, 8]
values = [base_lr * (0.1**i) for i in range(len(boundaries) + 1)]
lr = paddle.optimizer.lr.PiecewiseDecay(boundaries=boundaries,
values=values,
verbose=False)
optimizer = paddle.optimizer.Adam(learning_rate=lr,
parameters=model.parameters())
auto.fetch(model.parameters()[0], "param0", logging=True)
metrics = paddle.metric.Accuracy(topk=(1, 2))
self.engine = auto.Engine(model, loss, optimizer, metrics)
def test_fit_eval(self):
history = self.engine.fit(train_data=self.train_dataset,
valid_data=self.test_dataset,
batch_size=128,
steps_per_epoch=60,
valid_steps=40,
log_freq=20,
save_dir=self.save_dir,
save_freq=1)
print(history.history)
def test_eval(self):
self.engine.evaluate(valid_data=self.test_dataset,
batch_size=128,
steps=40,
log_freq=10)
def test_predict(self):
logger_cbks = paddle.callbacks.ProgBarLogger()
self.engine.predict(test_data=self.test_dataset,
batch_size=128,
callbacks=[logger_cbks])
if __name__ == '__main__':
unittest.main()
...@@ -68,7 +68,7 @@ class TestLRScheduler(TestEngineBase): ...@@ -68,7 +68,7 @@ class TestLRScheduler(TestEngineBase):
def test_lr_scheduler(self): def test_lr_scheduler(self):
self.init_engine() self.init_engine()
self.engine.fit(self.dataset, batch_size=self.batch_size) self.engine.fit(self.dataset, batch_size=self.batch_size)
lr = self.engine._lr_optimizer._learning_rate lr = self.engine._optimizer._learning_rate
assert isinstance(lr, paddle.optimizer.lr.LRScheduler) assert isinstance(lr, paddle.optimizer.lr.LRScheduler)
......
...@@ -20,7 +20,7 @@ import warnings ...@@ -20,7 +20,7 @@ import warnings
import numpy as np import numpy as np
import paddle import paddle
from paddle.distributed import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.utils import try_import from paddle.utils import try_import
from .progressbar import ProgressBar from .progressbar import ProgressBar
......
...@@ -46,6 +46,7 @@ from paddle.static import InputSpec as Input ...@@ -46,6 +46,7 @@ from paddle.static import InputSpec as Input
import paddle.distributed as dist import paddle.distributed as dist
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
from paddle.distributed.fleet.base import role_maker from paddle.distributed.fleet.base import role_maker
from paddle.autograd import no_grad
from .callbacks import config_callbacks, EarlyStopping from .callbacks import config_callbacks, EarlyStopping
from .model_summary import summary from .model_summary import summary
...@@ -1099,7 +1100,7 @@ class Model(object): ...@@ -1099,7 +1100,7 @@ class Model(object):
self._update_inputs() self._update_inputs()
return loss return loss
@paddle.no_grad() @no_grad()
def eval_batch(self, inputs, labels=None): def eval_batch(self, inputs, labels=None):
""" """
Run one evaluating step on a batch of data. Run one evaluating step on a batch of data.
...@@ -1151,7 +1152,7 @@ class Model(object): ...@@ -1151,7 +1152,7 @@ class Model(object):
self._update_inputs() self._update_inputs()
return loss return loss
@paddle.no_grad() @no_grad()
def predict_batch(self, inputs): def predict_batch(self, inputs):
""" """
Run one predicting step on a batch of data. Run one predicting step on a batch of data.
......
...@@ -19,7 +19,7 @@ import numbers ...@@ -19,7 +19,7 @@ import numbers
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
from paddle.static import InputSpec from paddle.static import InputSpec
from paddle.autograd import no_grad
from collections import OrderedDict from collections import OrderedDict
__all__ = [] __all__ = []
...@@ -229,7 +229,7 @@ def summary(net, input_size=None, dtypes=None, input=None): ...@@ -229,7 +229,7 @@ def summary(net, input_size=None, dtypes=None, input=None):
return params_info return params_info
@paddle.no_grad() @no_grad()
def summary_string(model, input_size=None, dtypes=None, input=None): def summary_string(model, input_size=None, dtypes=None, input=None):
def _all_is_numper(items): def _all_is_numper(items):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册