提交 a898bb29 编写于 作者: C chenxuyi

fix muiti process download

上级 e731cd98
...@@ -21,7 +21,6 @@ import logging ...@@ -21,7 +21,6 @@ import logging
from tqdm import tqdm from tqdm import tqdm
from pathlib import Path from pathlib import Path
import six import six
import paddle as P
import time import time
if six.PY2: if six.PY2:
from pathlib2 import Path from pathlib2 import Path
...@@ -35,8 +34,6 @@ def _fetch_from_remote(url, ...@@ -35,8 +34,6 @@ def _fetch_from_remote(url,
force_download=False, force_download=False,
cached_dir='~/.paddle-ernie-cache'): cached_dir='~/.paddle-ernie-cache'):
import hashlib, tempfile, requests, tarfile import hashlib, tempfile, requests, tarfile
env = P.distributed.ParallelEnv()
sig = hashlib.md5(url.encode('utf8')).hexdigest() sig = hashlib.md5(url.encode('utf8')).hexdigest()
cached_dir = Path(cached_dir).expanduser() cached_dir = Path(cached_dir).expanduser()
try: try:
...@@ -44,33 +41,31 @@ def _fetch_from_remote(url, ...@@ -44,33 +41,31 @@ def _fetch_from_remote(url,
except OSError: except OSError:
pass pass
cached_dir_model = cached_dir / sig cached_dir_model = cached_dir / sig
done_file = cached_dir_model / 'fetch_done' from filelock import FileLock
if force_download or not done_file.exists(): with FileLock(str(cached_dir_model) + '.lock'):
if env.dev_id == 0: donefile = cached_dir_model / 'done'
cached_dir_model.mkdir() if (not force_download) and donefile.exists():
tmpfile = cached_dir_model / 'tmp' log.debug('%s cached in %s' % (url, cached_dir_model))
with tmpfile.open('wb') as f: return cached_dir_model
r = requests.get(url, stream=True) cached_dir_model.mkdir(exist_ok=True)
total_len = int(r.headers.get('content-length')) tmpfile = cached_dir_model / 'tmp'
for chunk in tqdm( with tmpfile.open('wb') as f:
r.iter_content(chunk_size=1024), r = requests.get(url, stream=True)
total=total_len // 1024, total_len = int(r.headers.get('content-length'))
desc='downloading %s' % url, for chunk in tqdm(
unit='KB'): r.iter_content(chunk_size=1024),
if chunk: total=total_len // 1024,
f.write(chunk) desc='downloading %s' % url,
f.flush() unit='KB'):
log.debug('extacting... to %s' % tmpfile) if chunk:
with tarfile.open(tmpfile.as_posix()) as tf: f.write(chunk)
tf.extractall(path=cached_dir_model.as_posix()) f.flush()
os.remove(tmpfile.as_posix()) log.debug('extacting... to %s' % tmpfile)
f = done_file.open('wb') with tarfile.open(tmpfile.as_posix()) as tf:
f.close() tf.extractall(path=str(cached_dir_model))
else: donefile.touch()
while not done_file.exists(): os.remove(tmpfile.as_posix())
time.sleep(1)
log.debug('%s cached in %s' % (url, cached_dir))
return cached_dir_model return cached_dir_model
......
...@@ -6,4 +6,5 @@ sentencepiece==0.1.8 ...@@ -6,4 +6,5 @@ sentencepiece==0.1.8
jieba==0.39 jieba==0.39
visualdl>=2.0.0b7 visualdl>=2.0.0b7
pathlib2>=2.3.2 pathlib2>=2.3.2
filelock>=3.0.0
tqdm>=4.32.2 tqdm>=4.32.2
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册