import requests import re import time # 声明 UA headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36" } # 存储异常路径,防止出现爬取失败情况 error_list = [] # 爬虫入口 def run(): url = "http://www.ultramanclub.com/allultraman/" try: # 网页访问速度慢,需要设置 timeout res = requests.get(url=url, headers=headers, timeout=10) res.encoding = "gb2312" html = res.text return get_detail_list(html) except Exception as e: print("请求异常", e) # 获取全部奥特曼详情页 def get_detail_list(html): start_index = '