From 705acb4c130489dbcaa3f88cfc34b7978377e22e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=A2=A6=E6=83=B3=E6=A9=A1=E7=9A=AE=E6=93=A6?= Date: Mon, 5 Dec 2022 16:15:30 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8F=AF=E7=88=B1=E5=A5=B3=E4=BA=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...57\347\210\261\345\245\263\344\272\272.py" | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 "\345\217\257\347\210\261\345\245\263\344\272\272\346\272\220\347\240\201/\345\217\257\347\210\261\345\245\263\344\272\272.py" diff --git "a/\345\217\257\347\210\261\345\245\263\344\272\272\346\272\220\347\240\201/\345\217\257\347\210\261\345\245\263\344\272\272.py" "b/\345\217\257\347\210\261\345\245\263\344\272\272\346\272\220\347\240\201/\345\217\257\347\210\261\345\245\263\344\272\272.py" new file mode 100644 index 0000000..ad58ec2 --- /dev/null +++ "b/\345\217\257\347\210\261\345\245\263\344\272\272\346\272\220\347\240\201/\345\217\257\347\210\261\345\245\263\344\272\272.py" @@ -0,0 +1,100 @@ +import requests +import re +import threading +import time + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"} + +# 详情页图片地址 URL +detail_urls = [] + +mutex = threading.Lock() + + +# 循环获取URL +def get_detail_urls(url): + res = requests.get(url=url, headers=headers) + res.encoding = 'gb2312' + if res is not None: + + html = res.text # 读取页面源码 + # 对目标源码页数据进行裁剪 + # 获取 ul class = "g-gxlist-imgbox" 的数据 + # 该数据在标签