未验证 提交 9210b1af 编写于 作者: R Ruibiao Chen 提交者: GitHub

Support nvprof for auto parallel (#55347)

* Support nvprof for auto parallel

* Fix CI errors

* Fix CI errors
上级 65950324
...@@ -109,6 +109,11 @@ paddle::framework::FetchList StandaloneExecutor::Run( ...@@ -109,6 +109,11 @@ paddle::framework::FetchList StandaloneExecutor::Run(
const auto& job = jobs[job_idx]; const auto& job = jobs[job_idx];
const std::string& job_type = job->Type(); const std::string& job_type = job->Type();
platform::RecordEvent record_event(
job_type + "-" + std::to_string(job->MicroBatchId()),
platform::TracerEventType::UserDefined,
1);
VLOG(6) << "Run job (" << job_idx << "), type = " << job_type VLOG(6) << "Run job (" << job_idx << "), type = " << job_type
<< ", micro_batch_id =" << job->MicroBatchId(); << ", micro_batch_id =" << job->MicroBatchId();
......
...@@ -868,6 +868,7 @@ class Engine: ...@@ -868,6 +868,7 @@ class Engine:
collate_fn=None, collate_fn=None,
callbacks=None, callbacks=None,
verbose=2, verbose=2,
nvprof_range=[-1, -1],
): ):
""" """
Trains the model for a fixed number of epochs. If `valid_data` is set, Trains the model for a fixed number of epochs. If `valid_data` is set,
...@@ -905,6 +906,7 @@ class Engine: ...@@ -905,6 +906,7 @@ class Engine:
0. Default None. 0. Default None.
callbacks (Callback|None, optional): A list of `Callback` instances to apply callbacks (Callback|None, optional): A list of `Callback` instances to apply
during training. Default: None. (Unused for now) during training. Default: None. (Unused for now)
nvprof_range(list, optional): A list of integers indicating nvprof ranges in form of [start_step, end_step]. Note that if start_step >= end_step, the nvprof will not apply.
Returns: Returns:
None None
...@@ -975,28 +977,32 @@ class Engine: ...@@ -975,28 +977,32 @@ class Engine:
for epoch in range(epochs): for epoch in range(epochs):
logs = {} logs = {}
cbks.on_epoch_begin(epoch) cbks.on_epoch_begin(epoch)
for step, _ in enumerate(train_dataloader): for step, _ in enumerate(train_dataloader):
cbks.on_batch_begin('train', step, logs) with paddle.profiler.utils._nvprof_range(
try: iter_id=step, start=nvprof_range[0], end=nvprof_range[1]
outs = self._executor.run( ):
self.main_program, cbks.on_batch_begin('train', step, logs)
fetch_list=fetch_names, try:
use_program_cache=self._strategy.use_cache, outs = self._executor.run(
return_numpy=self._strategy.return_numpy, self.main_program,
fetch_list=fetch_names,
use_program_cache=self._strategy.use_cache,
return_numpy=self._strategy.return_numpy,
)
except core.EOFException:
break
lr = auto_utils.get_lr(self.optimizer)
logs = self._prepare_logger(
outs,
epoch,
step,
lr,
fetch_names,
fetch_indices,
self._mode,
) )
except core.EOFException: cbks.on_batch_end('train', step, logs)
break
lr = auto_utils.get_lr(self.optimizer)
logs = self._prepare_logger(
outs,
epoch,
step,
lr,
fetch_names,
fetch_indices,
self._mode,
)
cbks.on_batch_end('train', step, logs)
if valid_data and (epoch + 1) % valid_freq == 0: if valid_data and (epoch + 1) % valid_freq == 0:
val_logs = self.evaluate( val_logs = self.evaluate(
......
...@@ -13,7 +13,8 @@ ...@@ -13,7 +13,8 @@
# limitations under the License. # limitations under the License.
import functools import functools
from contextlib import ContextDecorator import sys
from contextlib import ContextDecorator, contextmanager
from typing import Any from typing import Any
from warnings import warn from warnings import warn
...@@ -192,3 +193,34 @@ def wrap_optimizers(): ...@@ -192,3 +193,34 @@ def wrap_optimizers():
if getattr(classobject, 'step', None) is not None: if getattr(classobject, 'step', None) is not None:
classobject.step = optimizer_warpper(classobject.step) classobject.step = optimizer_warpper(classobject.step)
_has_optimizer_wrapped = True _has_optimizer_wrapped = True
@contextmanager
def _nvprof_range(iter_id, start, end, exit_after_prof=True):
'''
A range profiler interface (not public yet).
Examples:
.. code-block:: python
model = Model()
for i in range(max_iter):
paddle.fluid.profiler._nvprof_range(i, 10, 20):
out = model(in)
'''
if start >= end:
yield
return
try:
if iter_id == start:
core.nvprof_start()
core.nvprof_enable_record_event()
if iter_id >= start:
core.nvprof_nvtx_push(str(iter_id))
yield
finally:
if iter_id < end:
core.nvprof_nvtx_pop()
if iter_id == end - 1:
core.nvprof_stop()
if exit_after_prof:
sys.exit()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册