未验证 提交 9210b1af 编写于 作者: R Ruibiao Chen 提交者: GitHub

Support nvprof for auto parallel (#55347)

* Support nvprof for auto parallel

* Fix CI errors

* Fix CI errors
上级 65950324
......@@ -109,6 +109,11 @@ paddle::framework::FetchList StandaloneExecutor::Run(
const auto& job = jobs[job_idx];
const std::string& job_type = job->Type();
platform::RecordEvent record_event(
job_type + "-" + std::to_string(job->MicroBatchId()),
platform::TracerEventType::UserDefined,
1);
VLOG(6) << "Run job (" << job_idx << "), type = " << job_type
<< ", micro_batch_id =" << job->MicroBatchId();
......
......@@ -868,6 +868,7 @@ class Engine:
collate_fn=None,
callbacks=None,
verbose=2,
nvprof_range=[-1, -1],
):
"""
Trains the model for a fixed number of epochs. If `valid_data` is set,
......@@ -905,6 +906,7 @@ class Engine:
0. Default None.
callbacks (Callback|None, optional): A list of `Callback` instances to apply
during training. Default: None. (Unused for now)
nvprof_range(list, optional): A list of integers indicating nvprof ranges in form of [start_step, end_step]. Note that if start_step >= end_step, the nvprof will not apply.
Returns:
None
......@@ -975,28 +977,32 @@ class Engine:
for epoch in range(epochs):
logs = {}
cbks.on_epoch_begin(epoch)
for step, _ in enumerate(train_dataloader):
cbks.on_batch_begin('train', step, logs)
try:
outs = self._executor.run(
self.main_program,
fetch_list=fetch_names,
use_program_cache=self._strategy.use_cache,
return_numpy=self._strategy.return_numpy,
with paddle.profiler.utils._nvprof_range(
iter_id=step, start=nvprof_range[0], end=nvprof_range[1]
):
cbks.on_batch_begin('train', step, logs)
try:
outs = self._executor.run(
self.main_program,
fetch_list=fetch_names,
use_program_cache=self._strategy.use_cache,
return_numpy=self._strategy.return_numpy,
)
except core.EOFException:
break
lr = auto_utils.get_lr(self.optimizer)
logs = self._prepare_logger(
outs,
epoch,
step,
lr,
fetch_names,
fetch_indices,
self._mode,
)
except core.EOFException:
break
lr = auto_utils.get_lr(self.optimizer)
logs = self._prepare_logger(
outs,
epoch,
step,
lr,
fetch_names,
fetch_indices,
self._mode,
)
cbks.on_batch_end('train', step, logs)
cbks.on_batch_end('train', step, logs)
if valid_data and (epoch + 1) % valid_freq == 0:
val_logs = self.evaluate(
......
......@@ -13,7 +13,8 @@
# limitations under the License.
import functools
from contextlib import ContextDecorator
import sys
from contextlib import ContextDecorator, contextmanager
from typing import Any
from warnings import warn
......@@ -192,3 +193,34 @@ def wrap_optimizers():
if getattr(classobject, 'step', None) is not None:
classobject.step = optimizer_warpper(classobject.step)
_has_optimizer_wrapped = True
@contextmanager
def _nvprof_range(iter_id, start, end, exit_after_prof=True):
'''
A range profiler interface (not public yet).
Examples:
.. code-block:: python
model = Model()
for i in range(max_iter):
paddle.fluid.profiler._nvprof_range(i, 10, 20):
out = model(in)
'''
if start >= end:
yield
return
try:
if iter_id == start:
core.nvprof_start()
core.nvprof_enable_record_event()
if iter_id >= start:
core.nvprof_nvtx_push(str(iter_id))
yield
finally:
if iter_id < end:
core.nvprof_nvtx_pop()
if iter_id == end - 1:
core.nvprof_stop()
if exit_after_prof:
sys.exit()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册