From ee5430fb7df1a72f39b9e12cde030e00d8d2e57c Mon Sep 17 00:00:00 2001 From: risemeup1 <62429225+risemeup1@users.noreply.github.com> Date: Mon, 12 Jun 2023 15:27:39 +0800 Subject: [PATCH] =?UTF-8?q?Replace=20python=20dependency=20library=20?= =?UTF-8?q?=E2=80=9Crequests=E2=80=9D=20with=20=E2=80=9Chttpx=E2=80=9D=20(?= =?UTF-8?q?#47660)=20(#54529)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * replace requests with httpx * set timeout=3 * replace requests with httpx * replace request with httpx * test * repalce requests with httpx * test * replace requests with httpx * replace requests with httpx * modify paddle_build.sh * fix bug --- paddle/scripts/paddle_build.sh | 6 +-- python/paddle/dataset/common.py | 50 +++++++++-------- .../distributed/launch/utils/kv_client.py | 10 ++-- python/paddle/utils/download.py | 53 +++++++++---------- python/requirements.txt | 2 +- .../test_post_training_quantization_mnist.py | 50 ++++++++--------- tools/CheckPRTemplate.py | 10 ++-- tools/get_pr_ut.py | 4 +- tools/get_quick_disable_lt.py | 4 +- 9 files changed, 98 insertions(+), 91 deletions(-) diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index a67f7607d1a..7c3b1d1f1c2 100644 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -907,7 +907,7 @@ function get_precision_ut_mac() { UT_list=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d') precision_cases="" if [ ${PRECISION_TEST:-OFF} == "ON" ]; then - python3.7 $PADDLE_ROOT/tools/get_pr_ut.py + python $PADDLE_ROOT/tools/get_pr_ut.py if [[ -f "ut_list" ]]; then echo "PREC length: "`wc -l ut_list` precision_cases=`cat ut_list` @@ -1395,7 +1395,7 @@ set -x precison_cases="" bash $PADDLE_ROOT/tools/check_added_ut.sh if [ ${PRECISION_TEST:-OFF} == "ON" ]; then - python3.7 $PADDLE_ROOT/tools/get_pr_ut.py + python $PADDLE_ROOT/tools/get_pr_ut.py if [[ -f "ut_list" ]]; then set +x echo "PREC length: "`wc -l ut_list` @@ -2364,7 +2364,7 @@ set -x check_approvals_of_unittest 2 ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d' > ${PADDLE_ROOT}/build/all_ut_list if [ ${PRECISION_TEST:-OFF} == "ON" ]; then - python3.7 $PADDLE_ROOT/tools/get_pr_ut.py + python $PADDLE_ROOT/tools/get_pr_ut.py fi if [ -a "$PADDLE_ROOT/duplicate_ut" ];then duplicate_uts=$(cat $PADDLE_ROOT/duplicate_ut|sed -e 's/\r//g') diff --git a/python/paddle/dataset/common.py b/python/paddle/dataset/common.py index bab6c6f9e74..c7a6d1b0b19 100644 --- a/python/paddle/dataset/common.py +++ b/python/paddle/dataset/common.py @@ -22,7 +22,7 @@ import shutil import sys import tempfile -import requests +import httpx import paddle import paddle.dataset @@ -100,28 +100,32 @@ def download(url, module_name, md5sum, save_name=None): ) sys.stderr.write("Begin to download\n") try: - r = requests.get(url, stream=True) - total_length = r.headers.get('content-length') - - if total_length is None: - with open(filename, 'wb') as f: - shutil.copyfileobj(r.raw, f) - else: - with open(filename, 'wb') as f: - chunk_size = 4096 - total_length = int(total_length) - total_iter = total_length / chunk_size + 1 - log_interval = total_iter // 20 if total_iter > 20 else 1 - log_index = 0 - bar = paddle.hapi.progressbar.ProgressBar( - total_iter, name='item' - ) - for data in r.iter_content(chunk_size=chunk_size): - f.write(data) - log_index += 1 - bar.update(log_index, {}) - if log_index % log_interval == 0: - bar.update(log_index) + # (risemeup1):use httpx to replace requests + with httpx.stream( + "GET", url, timeout=None, follow_redirects=True + ) as r: + total_length = r.headers.get('content-length') + if total_length is None: + with open(filename, 'wb') as f: + shutil.copyfileobj(r.raw, f) + else: + with open(filename, 'wb') as f: + chunk_size = 4096 + total_length = int(total_length) + total_iter = total_length / chunk_size + 1 + log_interval = ( + total_iter // 20 if total_iter > 20 else 1 + ) + log_index = 0 + bar = paddle.hapi.progressbar.ProgressBar( + total_iter, name='item' + ) + for data in r.iter_bytes(chunk_size=chunk_size): + f.write(data) + log_index += 1 + bar.update(log_index, {}) + if log_index % log_interval == 0: + bar.update(log_index) except Exception as e: # re-try diff --git a/python/paddle/distributed/launch/utils/kv_client.py b/python/paddle/distributed/launch/utils/kv_client.py index 3da25401c92..d4cfd0cbfb6 100644 --- a/python/paddle/distributed/launch/utils/kv_client.py +++ b/python/paddle/distributed/launch/utils/kv_client.py @@ -14,7 +14,7 @@ import time -import requests +import httpx class KVClient: @@ -27,7 +27,7 @@ class KVClient: key = key if key.startswith('/') else f"/{key}" u = f"{self.endpoint}{key}" try: - r = requests.post(u, data=value, timeout=3) + r = httpx.post(u, data=value, timeout=None, follow_redirects=True) if r.status_code == 200: return True else: @@ -39,7 +39,7 @@ class KVClient: key = key if key.startswith('/') else f"/{key}" u = f"{self.endpoint}{key}" try: - r = requests.get(u, timeout=3) + r = httpx.get(u, timeout=None, follow_redirects=True) if r.status_code == 200: ret = r.json() return ret.get(key, '') @@ -52,7 +52,7 @@ class KVClient: key = key if key.startswith('/') else f"/{key}" u = f"{self.endpoint}{key}" try: - r = requests.get(u, timeout=3) + r = httpx.get(u, timeout=None, follow_redirects=True) if r.status_code == 200: return r.json() except: @@ -62,7 +62,7 @@ class KVClient: key = key if key.startswith('/') else f"/{key}" u = f"{self.endpoint}{key}" try: - r = requests.delete(u, timeout=3) + r = httpx.delete(u, timeout=None, follow_redirects=True) if r.status_code == 200: return True else: diff --git a/python/paddle/utils/download.py b/python/paddle/utils/download.py index c7551945f5c..e8f73aaa71b 100644 --- a/python/paddle/utils/download.py +++ b/python/paddle/utils/download.py @@ -22,7 +22,7 @@ import tarfile import time import zipfile -import requests +import httpx try: from tqdm import tqdm @@ -167,7 +167,30 @@ def _get_download(url, fullname): # using requests.get method fname = osp.basename(fullname) try: - req = requests.get(url, stream=True) + with httpx.stream( + "GET", url, timeout=None, follow_redirects=True + ) as req: + if req.status_code != 200: + raise RuntimeError( + "Downloading from {} failed with code " + "{}!".format(url, req.status_code) + ) + + tmp_fullname = fullname + "_tmp" + total_size = req.headers.get('content-length') + with open(tmp_fullname, 'wb') as f: + if total_size: + with tqdm(total=(int(total_size) + 1023) // 1024) as pbar: + for chunk in req.iter_bytes(chunk_size=1024): + f.write(chunk) + pbar.update(1) + else: + for chunk in req.iter_bytes(chunk_size=1024): + if chunk: + f.write(chunk) + shutil.move(tmp_fullname, fullname) + return fullname + except Exception as e: # requests.exceptions.ConnectionError logger.info( "Downloading {} from {} failed with exception {}".format( @@ -176,31 +199,6 @@ def _get_download(url, fullname): ) return False - if req.status_code != 200: - raise RuntimeError( - "Downloading from {} failed with code " - "{}!".format(url, req.status_code) - ) - - # For protecting download interrupted, download to - # tmp_fullname firstly, move tmp_fullname to fullname - # after download finished - tmp_fullname = fullname + "_tmp" - total_size = req.headers.get('content-length') - with open(tmp_fullname, 'wb') as f: - if total_size: - with tqdm(total=(int(total_size) + 1023) // 1024) as pbar: - for chunk in req.iter_content(chunk_size=1024): - f.write(chunk) - pbar.update(1) - else: - for chunk in req.iter_content(chunk_size=1024): - if chunk: - f.write(chunk) - shutil.move(tmp_fullname, fullname) - - return fullname - def _wget_download(url, fullname): # using wget to download url @@ -255,6 +253,7 @@ def _download(url, path, md5sum=None, method='get'): logger.info(f"Downloading {fname} from {url}") while not (osp.exists(fullname) and _md5check(fullname, md5sum)): + logger.info(f"md5check {fullname} and {md5sum}") if retry_cnt < DOWNLOAD_RETRY_LIMIT: retry_cnt += 1 else: diff --git a/python/requirements.txt b/python/requirements.txt index afb2b120075..499a27d7976 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,4 +1,4 @@ -requests>=2.20.0 +httpx numpy>=1.13 protobuf>=3.20.2 ; platform_system != "Windows" protobuf>=3.1.0, <=3.20.2 ; platform_system == "Windows" diff --git a/test/quantization/test_post_training_quantization_mnist.py b/test/quantization/test_post_training_quantization_mnist.py index 1cdec57bf0b..4342e938d00 100644 --- a/test/quantization/test_post_training_quantization_mnist.py +++ b/test/quantization/test_post_training_quantization_mnist.py @@ -64,7 +64,7 @@ class TestPostTrainingQuantization(unittest.TestCase): def download(self, url, dirname, md5sum, save_name=None): import shutil - import requests + import httpx filename = os.path.join( dirname, url.split('/')[-1] if save_name is None else save_name @@ -91,30 +91,30 @@ class TestPostTrainingQuantization(unittest.TestCase): ) sys.stderr.write("Begin to download\n") try: - r = requests.get(url, stream=True) - total_length = r.headers.get('content-length') - - if total_length is None: - with open(filename, 'wb') as f: - shutil.copyfileobj(r.raw, f) - else: - with open(filename, 'wb') as f: - chunk_size = 4096 - total_length = int(total_length) - total_iter = total_length / chunk_size + 1 - log_interval = ( - total_iter // 20 if total_iter > 20 else 1 - ) - log_index = 0 - bar = paddle.hapi.progressbar.ProgressBar( - total_iter, name='item' - ) - for data in r.iter_content(chunk_size=chunk_size): - f.write(data) - log_index += 1 - bar.update(log_index, {}) - if log_index % log_interval == 0: - bar.update(log_index) + with httpx.stream("GET", url) as r: + total_length = r.headers.get('content-length') + + if total_length is None: + with open(filename, 'wb') as f: + shutil.copyfileobj(r.raw, f) + else: + with open(filename, 'wb') as f: + chunk_size = 4096 + total_length = int(total_length) + total_iter = total_length / chunk_size + 1 + log_interval = ( + total_iter // 20 if total_iter > 20 else 1 + ) + log_index = 0 + bar = paddle.hapi.progressbar.ProgressBar( + total_iter, name='item' + ) + for data in r.iter_bytes(chunk_size=chunk_size): + f.write(data) + log_index += 1 + bar.update(log_index, {}) + if log_index % log_interval == 0: + bar.update(log_index) except Exception as e: # re-try diff --git a/tools/CheckPRTemplate.py b/tools/CheckPRTemplate.py index 3ba32deadeb..6da19fc5ab1 100644 --- a/tools/CheckPRTemplate.py +++ b/tools/CheckPRTemplate.py @@ -16,7 +16,7 @@ import os import re import sys -import requests +import httpx PR_checkTemplate = ['Paddle'] @@ -73,7 +73,9 @@ def checkComments(url): headers = { 'Authorization': 'token ' + GITHUB_API_TOKEN, } - response = requests.get(url, headers=headers).json() + response = httpx.get( + url, headers=headers, timeout=None, follow_redirects=True + ).json() return response @@ -138,7 +140,9 @@ def get_a_pull(pull_id): 'Authorization': 'token ' + GITHUB_API_TOKEN, 'Accept': 'application/vnd.github+json', } - response = requests.request("GET", url, headers=headers, data=payload) + response = httpx.request( + "GET", url, headers=headers, data=payload, follow_redirects=True + ) return response.json() diff --git a/tools/get_pr_ut.py b/tools/get_pr_ut.py index 068d2fcb9ba..a61814a1193 100644 --- a/tools/get_pr_ut.py +++ b/tools/get_pr_ut.py @@ -23,7 +23,7 @@ import sys import time import urllib.request -import requests +import httpx from github import Github PADDLE_ROOT = os.getenv('PADDLE_ROOT', '/paddle/') @@ -217,7 +217,7 @@ class PRChecker: def get_pr_diff_lines(self): file_to_diff_lines = {} - r = requests.get(self.pr.diff_url) + r = httpx.get(self.pr.diff_url, timeout=None, follow_redirects=True) data = r.text data = data.split('\n') ix = 0 diff --git a/tools/get_quick_disable_lt.py b/tools/get_quick_disable_lt.py index 2fe86a66071..6a51771a6d3 100644 --- a/tools/get_quick_disable_lt.py +++ b/tools/get_quick_disable_lt.py @@ -15,7 +15,7 @@ import ssl import sys -import requests +import httpx import paddle @@ -32,7 +32,7 @@ def download_file(): if paddle.is_compiled_with_rocm(): url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_rocm') - f = requests.get(url) + f = httpx.get(url, timeout=None, follow_redirects=True) data = f.text status_code = f.status_code if len(data.strip()) == 0 or status_code != 200: -- GitLab