diff --git "a/\345\217\257\347\210\261\345\245\263\344\272\272\346\272\220\347\240\201/\345\217\257\347\210\261\345\245\263\344\272\272.py" "b/\345\217\257\347\210\261\345\245\263\344\272\272\346\272\220\347\240\201/\345\217\257\347\210\261\345\245\263\344\272\272.py" new file mode 100644 index 0000000000000000000000000000000000000000..ad58ec233f3ccae195168e9568f5832482efe148 --- /dev/null +++ "b/\345\217\257\347\210\261\345\245\263\344\272\272\346\272\220\347\240\201/\345\217\257\347\210\261\345\245\263\344\272\272.py" @@ -0,0 +1,100 @@ +import requests +import re +import threading +import time + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"} + +# 详情页图片地址 URL +detail_urls = [] + +mutex = threading.Lock() + + +# 循环获取URL +def get_detail_urls(url): + res = requests.get(url=url, headers=headers) + res.encoding = 'gb2312' + if res is not None: + + html = res.text # 读取页面源码 + # 对目标源码页数据进行裁剪 + # 获取 ul class = "g-gxlist-imgbox" 的数据 + # 该数据在标签