import requests import re import time # 请求函数 def request_get(url, ret_type="text", timeout=5, encoding="GBK"): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36" } res = requests.get(url=url, headers=headers, timeout=timeout) res.encoding = encoding if ret_type == "text": return res.text elif ret_type == "image": return res.content # 抓取函数 def main(): urls = [f"http://www.netbian.com/mei/index_{i}.htm" for i in range(2, 201)] url = "http://www.netbian.com/mei/index.htm" urls.insert(0, url) for url in urls: print("抓取列表页地址为:", url) text = request_get(url) format(text) # 解析函数 def format(text): origin_text = split_str(text, '