From 5e5b0bfbc37f22f4353ebf656b965f7655fe4087 Mon Sep 17 00:00:00 2001 From: wangxinxin08 <69842442+wangxinxin08@users.noreply.github.com> Date: Wed, 21 Sep 2022 16:48:00 +0800 Subject: [PATCH] fix mkdirs to avoid race problem (#6975) --- ppdet/utils/download.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/ppdet/utils/download.py b/ppdet/utils/download.py index 006733aa7..d7100cf39 100644 --- a/ppdet/utils/download.py +++ b/ppdet/utils/download.py @@ -29,6 +29,7 @@ import base64 import binascii import tarfile import zipfile +import errno from paddle.utils.download import _get_unique_endpoints from ppdet.core.workspace import BASE_KEY @@ -110,6 +111,20 @@ DOWNLOAD_RETRY_LIMIT = 3 PPDET_WEIGHTS_DOWNLOAD_URL_PREFIX = 'https://paddledet.bj.bcebos.com/' +# When running unit tests, there could be multiple processes that +# trying to create DATA_HOME directory simultaneously, so we cannot +# use a if condition to check for the existence of the directory; +# instead, we use the filesystem as the synchronization mechanism by +# catching returned errors. +def must_mkdirs(path): + try: + os.makedirs(path) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + pass + + def parse_url(url): url = url.replace("ppdet://", PPDET_WEIGHTS_DOWNLOAD_URL_PREFIX) return url @@ -344,8 +359,7 @@ def _download(url, path, md5sum=None): url (str): download url path (str): download to given path """ - if not osp.exists(path): - os.makedirs(path) + must_mkdirs(path) fname = osp.split(url)[-1] fullname = osp.join(path, fname) @@ -407,8 +421,7 @@ def _download_dist(url, path, md5sum=None): fullname = osp.join(path, fname) lock_path = fullname + '.download.lock' - if not osp.isdir(path): - os.makedirs(path) + must_mkdirs(path) if not osp.exists(fullname): with open(lock_path, 'w'): # touch -- GitLab