import requests import re import os import time headers = { "user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36", "host": 'www.cosplay8.com' } def get_list(url): """ 获取全部详情页链接 """ all_list = [] res = requests.get(url, headers=headers) html = res.text pattern = re.compile('
  • ') all_list = pattern.findall(html) return all_list def save_img(path, title, first_img, index): try: # 请求图片 img_res = requests.get( f"http://www.cosplay8.com{first_img}", headers=headers) img_data = img_res.content with open(f"{path}/{title}_{index}.png", "wb+") as f: f.write(img_data) except Exception as e: print(e) def get_detail(url): res = requests.get(url=url, headers=headers) res.encoding = "utf-8" html = res.text # 拆解页码,保存第一张图片 size_pattern = re.compile('共(\d+)页: ') # title_pattern = re.compile('(.*?)-Cosplay中国') title_pattern = re.compile('(.*?)-Cosplay(中国|8)') first_img_pattern = re.compile("