Replace python dependency library “requests” with “httpx” (#47660) (#54529)

* replace requests with httpx * set timeout=3 * replace requests with httpx * replace request with httpx * test * repalce requests with httpx * test * replace requests with httpx * replace requests with httpx * modify paddle_build.sh * fix bug

Replace python dependency library “requests” with “httpx” (#47660) (#54529)
* replace requests with httpx * set timeout=3 * replace requests with httpx * replace request with httpx * test * repalce requests with httpx * test * replace requests with httpx * replace requests with httpx * modify paddle_build.sh * fix bug
ee5430fb · risemeup1 · GitHub · 8c415496 · ee5430fb · ee5430fb
9 changed file
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -907,7 +907,7 @@ function get_precision_ut_mac() {
    UT_list=$(ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d')
    precision_cases=""
    if [ ${PRECISION_TEST:-OFF} == "ON" ]; then
-        python3.7 $PADDLE_ROOT/tools/get_pr_ut.py
+        python $PADDLE_ROOT/tools/get_pr_ut.py
        if [[ -f "ut_list" ]]; then
            echo "PREC length: "`wc -l ut_list`
            precision_cases=`cat ut_list`
@@ -1395,7 +1395,7 @@ set -x
        precison_cases=""
        bash $PADDLE_ROOT/tools/check_added_ut.sh
        if [ ${PRECISION_TEST:-OFF} == "ON" ]; then
-            python3.7 $PADDLE_ROOT/tools/get_pr_ut.py
+            python $PADDLE_ROOT/tools/get_pr_ut.py
            if [[ -f "ut_list" ]]; then
                set +x
                echo "PREC length: "`wc -l ut_list`
@@ -2364,7 +2364,7 @@ set -x
        check_approvals_of_unittest 2
        ctest -N | awk -F ': ' '{print $2}' | sed '/^$/d' | sed '$d' > ${PADDLE_ROOT}/build/all_ut_list
        if [ ${PRECISION_TEST:-OFF} == "ON" ]; then
-            python3.7 $PADDLE_ROOT/tools/get_pr_ut.py
+            python $PADDLE_ROOT/tools/get_pr_ut.py
        fi
        if [ -a "$PADDLE_ROOT/duplicate_ut" ];then
            duplicate_uts=$(cat $PADDLE_ROOT/duplicate_ut|sed -e 's/\r//g')

--- a/python/paddle/dataset/common.py
+++ b/python/paddle/dataset/common.py
@@ -22,7 +22,7 @@ import shutil
 import sys
 import tempfile
-import requests
+import httpx
 import paddle
 import paddle.dataset
@@ -100,28 +100,32 @@ def download(url, module_name, md5sum, save_name=None):
        )
        sys.stderr.write("Begin to download\n")
        try:
-            r = requests.get(url, stream=True)
+            # (risemeup1):use httpx to replace requests
-            total_length = r.headers.get('content-length')
+            with httpx.stream(
+                "GET", url, timeout=None, follow_redirects=True
-            if total_length is None:
+            ) as r:
-                with open(filename, 'wb') as f:
+                total_length = r.headers.get('content-length')
-                    shutil.copyfileobj(r.raw, f)
+                if total_length is None:
-            else:
+                    with open(filename, 'wb') as f:
-                with open(filename, 'wb') as f:
+                        shutil.copyfileobj(r.raw, f)
-                    chunk_size = 4096
+                else:
-                    total_length = int(total_length)
+                    with open(filename, 'wb') as f:
-                    total_iter = total_length / chunk_size + 1
+                        chunk_size = 4096
-                    log_interval = total_iter // 20 if total_iter > 20 else 1
+                        total_length = int(total_length)
-                    log_index = 0
+                        total_iter = total_length / chunk_size + 1
-                    bar = paddle.hapi.progressbar.ProgressBar(
+                        log_interval = (
-                        total_iter, name='item'
+                            total_iter // 20 if total_iter > 20 else 1
-                    )
+                        )
-                    for data in r.iter_content(chunk_size=chunk_size):
+                        log_index = 0
-                        f.write(data)
+                        bar = paddle.hapi.progressbar.ProgressBar(
-                        log_index += 1
+                            total_iter, name='item'
-                        bar.update(log_index, {})
+                        )
-                        if log_index % log_interval == 0:
+                        for data in r.iter_bytes(chunk_size=chunk_size):
-                            bar.update(log_index)
+                            f.write(data)
+                            log_index += 1
+                            bar.update(log_index, {})
+                            if log_index % log_interval == 0:
+                                bar.update(log_index)
        except Exception as e:
            # re-try

--- a/python/paddle/distributed/launch/utils/kv_client.py
+++ b/python/paddle/distributed/launch/utils/kv_client.py
@@ -14,7 +14,7 @@
 import time
-import requests
+import httpx
 class KVClient:
@@ -27,7 +27,7 @@ class KVClient:
        key = key if key.startswith('/') else f"/{key}"
        u = f"{self.endpoint}{key}"
        try:
-            r = requests.post(u, data=value, timeout=3)
+            r = httpx.post(u, data=value, timeout=None, follow_redirects=True)
            if r.status_code == 200:
                return True
            else:
@@ -39,7 +39,7 @@ class KVClient:
        key = key if key.startswith('/') else f"/{key}"
        u = f"{self.endpoint}{key}"
        try:
-            r = requests.get(u, timeout=3)
+            r = httpx.get(u, timeout=None, follow_redirects=True)
            if r.status_code == 200:
                ret = r.json()
                return ret.get(key, '')
@@ -52,7 +52,7 @@ class KVClient:
        key = key if key.startswith('/') else f"/{key}"
        u = f"{self.endpoint}{key}"
        try:
-            r = requests.get(u, timeout=3)
+            r = httpx.get(u, timeout=None, follow_redirects=True)
            if r.status_code == 200:
                return r.json()
        except:
@@ -62,7 +62,7 @@ class KVClient:
        key = key if key.startswith('/') else f"/{key}"
        u = f"{self.endpoint}{key}"
        try:
-            r = requests.delete(u, timeout=3)
+            r = httpx.delete(u, timeout=None, follow_redirects=True)
            if r.status_code == 200:
                return True
            else:

--- a/python/paddle/utils/download.py
+++ b/python/paddle/utils/download.py
@@ -22,7 +22,7 @@ import tarfile
 import time
 import zipfile
-import requests
+import httpx
 try:
    from tqdm import tqdm
@@ -167,7 +167,30 @@ def _get_download(url, fullname):
    # using requests.get method
    fname = osp.basename(fullname)
    try:
-        req = requests.get(url, stream=True)
+        with httpx.stream(
+            "GET", url, timeout=None, follow_redirects=True
+        ) as req:
+            if req.status_code != 200:
+                raise RuntimeError(
+                    "Downloading from {} failed with code "
+                    "{}!".format(url, req.status_code)
+                )
+            tmp_fullname = fullname + "_tmp"
+            total_size = req.headers.get('content-length')
+            with open(tmp_fullname, 'wb') as f:
+                if total_size:
+                    with tqdm(total=(int(total_size) + 1023) // 1024) as pbar:
+                        for chunk in req.iter_bytes(chunk_size=1024):
+                            f.write(chunk)
+                            pbar.update(1)
+                else:
+                    for chunk in req.iter_bytes(chunk_size=1024):
+                        if chunk:
+                            f.write(chunk)
+            shutil.move(tmp_fullname, fullname)
+            return fullname
    except Exception as e:  # requests.exceptions.ConnectionError
        logger.info(
            "Downloading {} from {} failed with exception {}".format(
@@ -176,31 +199,6 @@ def _get_download(url, fullname):
        )
        return False
-    if req.status_code != 200:
-        raise RuntimeError(
-            "Downloading from {} failed with code "
-            "{}!".format(url, req.status_code)
-        )
-    # For protecting download interrupted, download to
-    # tmp_fullname firstly, move tmp_fullname to fullname
-    # after download finished
-    tmp_fullname = fullname + "_tmp"
-    total_size = req.headers.get('content-length')
-    with open(tmp_fullname, 'wb') as f:
-        if total_size:
-            with tqdm(total=(int(total_size) + 1023) // 1024) as pbar:
-                for chunk in req.iter_content(chunk_size=1024):
-                    f.write(chunk)
-                    pbar.update(1)
-        else:
-            for chunk in req.iter_content(chunk_size=1024):
-                if chunk:
-                    f.write(chunk)
-    shutil.move(tmp_fullname, fullname)
-    return fullname
 def _wget_download(url, fullname):
    # using wget to download url
@@ -255,6 +253,7 @@ def _download(url, path, md5sum=None, method='get'):
    logger.info(f"Downloading {fname} from {url}")
    while not (osp.exists(fullname) and _md5check(fullname, md5sum)):
+        logger.info(f"md5check {fullname} and {md5sum}")
        if retry_cnt < DOWNLOAD_RETRY_LIMIT:
            retry_cnt += 1
        else:

--- a/python/requirements.txt
+++ b/python/requirements.txt
-requests>=2.20.0
+httpx
 numpy>=1.13
 protobuf>=3.20.2 ; platform_system != "Windows"
 protobuf>=3.1.0, <=3.20.2 ; platform_system == "Windows"

--- a/test/quantization/test_post_training_quantization_mnist.py
+++ b/test/quantization/test_post_training_quantization_mnist.py
@@ -64,7 +64,7 @@ class TestPostTrainingQuantization(unittest.TestCase):
    def download(self, url, dirname, md5sum, save_name=None):
        import shutil
-        import requests
+        import httpx
        filename = os.path.join(
            dirname, url.split('/')[-1] if save_name is None else save_name
@@ -91,30 +91,30 @@ class TestPostTrainingQuantization(unittest.TestCase):
            )
            sys.stderr.write("Begin to download\n")
            try:
-                r = requests.get(url, stream=True)
+                with httpx.stream("GET", url) as r:
-                total_length = r.headers.get('content-length')
+                    total_length = r.headers.get('content-length')
-                if total_length is None:
+                    if total_length is None:
-                    with open(filename, 'wb') as f:
+                        with open(filename, 'wb') as f:
-                        shutil.copyfileobj(r.raw, f)
+                            shutil.copyfileobj(r.raw, f)
-                else:
+                    else:
-                    with open(filename, 'wb') as f:
+                        with open(filename, 'wb') as f:
-                        chunk_size = 4096
+                            chunk_size = 4096
-                        total_length = int(total_length)
+                            total_length = int(total_length)
-                        total_iter = total_length / chunk_size + 1
+                            total_iter = total_length / chunk_size + 1
-                        log_interval = (
+                            log_interval = (
-                            total_iter // 20 if total_iter > 20 else 1
+                                total_iter // 20 if total_iter > 20 else 1
-                        )
+                            )
-                        log_index = 0
+                            log_index = 0
-                        bar = paddle.hapi.progressbar.ProgressBar(
+                            bar = paddle.hapi.progressbar.ProgressBar(
-                            total_iter, name='item'
+                                total_iter, name='item'
-                        )
+                            )
-                        for data in r.iter_content(chunk_size=chunk_size):
+                            for data in r.iter_bytes(chunk_size=chunk_size):
-                            f.write(data)
+                                f.write(data)
-                            log_index += 1
+                                log_index += 1
-                            bar.update(log_index, {})
+                                bar.update(log_index, {})
-                            if log_index % log_interval == 0:
+                                if log_index % log_interval == 0:
-                                bar.update(log_index)
+                                    bar.update(log_index)
            except Exception as e:
                # re-try

--- a/tools/CheckPRTemplate.py
+++ b/tools/CheckPRTemplate.py
@@ -16,7 +16,7 @@ import os
 import re
 import sys
-import requests
+import httpx
 PR_checkTemplate = ['Paddle']
@@ -73,7 +73,9 @@ def checkComments(url):
    headers = {
        'Authorization': 'token ' + GITHUB_API_TOKEN,
    }
-    response = requests.get(url, headers=headers).json()
+    response = httpx.get(
+        url, headers=headers, timeout=None, follow_redirects=True
+    ).json()
    return response
@@ -138,7 +140,9 @@ def get_a_pull(pull_id):
        'Authorization': 'token ' + GITHUB_API_TOKEN,
        'Accept': 'application/vnd.github+json',
    }
-    response = requests.request("GET", url, headers=headers, data=payload)
+    response = httpx.request(
+        "GET", url, headers=headers, data=payload, follow_redirects=True
+    )
    return response.json()

--- a/tools/get_pr_ut.py
+++ b/tools/get_pr_ut.py
@@ -23,7 +23,7 @@ import sys
 import time
 import urllib.request
-import requests
+import httpx
 from github import Github
 PADDLE_ROOT = os.getenv('PADDLE_ROOT', '/paddle/')
@@ -217,7 +217,7 @@ class PRChecker:
    def get_pr_diff_lines(self):
        file_to_diff_lines = {}
-        r = requests.get(self.pr.diff_url)
+        r = httpx.get(self.pr.diff_url, timeout=None, follow_redirects=True)
        data = r.text
        data = data.split('\n')
        ix = 0

--- a/tools/get_quick_disable_lt.py
+++ b/tools/get_quick_disable_lt.py
@@ -15,7 +15,7 @@
 import ssl
 import sys
-import requests
+import httpx
 import paddle
@@ -32,7 +32,7 @@ def download_file():
    if paddle.is_compiled_with_rocm():
        url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_rocm')
-    f = requests.get(url)
+    f = httpx.get(url, timeout=None, follow_redirects=True)
    data = f.text
    status_code = f.status_code
    if len(data.strip()) == 0 or status_code != 200: