未验证 提交 6b54ae80 编写于 作者: K Kaipeng Deng 提交者: GitHub

fix download hang (#3881)

上级 f03283c4
...@@ -409,7 +409,7 @@ def _download_dist(url, path, md5sum=None): ...@@ -409,7 +409,7 @@ def _download_dist(url, path, md5sum=None):
os.remove(lock_path) os.remove(lock_path)
else: else:
while os.path.exists(lock_path): while os.path.exists(lock_path):
time.sleep(1) time.sleep(0.5)
return fullname return fullname
else: else:
return _download(url, path, md5sum) return _download(url, path, md5sum)
...@@ -504,14 +504,26 @@ def _decompress_dist(fname): ...@@ -504,14 +504,26 @@ def _decompress_dist(fname):
from paddle.distributed import ParallelEnv from paddle.distributed import ParallelEnv
unique_endpoints = _get_unique_endpoints(ParallelEnv() unique_endpoints = _get_unique_endpoints(ParallelEnv()
.trainer_endpoints[:]) .trainer_endpoints[:])
# NOTE(dkp): _decompress_dist always performed after
# _download_dist, in _download_dist sub-trainers is waiting
# for download lock file release with sleeping, if decompress
# prograss is very fast and finished with in the sleeping gap
# time, e.g in tiny dataset such as coco_ce, spine_coco, main
# trainer may finish decompress and release lock file, so we
# only craete lock file in main trainer and all sub-trainer
# wait 1s for main trainer to create lock file, for 1s is
# twice as sleeping gap, this waiting time can keep all
# trainer pipeline in order
# **change this if you have more elegent methods**
if ParallelEnv().current_endpoint in unique_endpoints:
with open(lock_path, 'w'): # touch with open(lock_path, 'w'): # touch
os.utime(lock_path, None) os.utime(lock_path, None)
if ParallelEnv().current_endpoint in unique_endpoints:
_decompress(fname) _decompress(fname)
os.remove(lock_path) os.remove(lock_path)
else: else:
while os.path.exists(lock_path):
time.sleep(1) time.sleep(1)
while os.path.exists(lock_path):
time.sleep(0.5)
else: else:
_decompress(fname) _decompress(fname)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册