from urllib.parse import urlencode import csv import random import requests import traceback from time import sleep from lxml import etree from fake_useragent import UserAgent import json import string headers = { 'authority':"api.eol.cn", 'scheme':'https', 'Connection': 'keep-alive', 'Accept': 'application/json, text/plain, */*', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42', 'Origin': 'https://www.gaokao.cn', 'Referer': 'https://www.gaokao.cn/', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'Content-Type': 'application/json' } def get_province(): url='https://static-data.gaokao.cn/www/2.0/config/elective/config.json' response = requests.get(url, timeout=10) provinces=[] if(response.status_code==200): re = response.content.decode('unicode_escape') data =json.loads(re)['data']['province'] print(data) for word in string.ascii_uppercase: if word in data: provinces.extend(data[word]) provinces.sort(key=lambda x:x['province_id']) print(provinces) #specal:院校类型 #keyword:院校名称 #page:页数 #proviceId:省份id #size:每页数量 #type:办学类型 def get_page(specal=None,keyword="",page=1,proviceId="",size=20,type=""): base_url = 'https://api.eol.cn/web/api/?' # 这里要换成对应Ajax请求中的链接 n = 3 while True: try: sleep(random.uniform(1, 2)) # 随机出现1-2之间的数,包含小数 data = {} if(specal): data = { specal:1, "keyword": keyword, "page": page, "province_id": proviceId, "ranktype": "", "request_type": 1, "size": size, "type": type, "uri": 'apidata/api/gkv3/school/lists', "signsafe": "7d3ad7653039f90d198e9dad129022c6", } else: data = { "keyword": keyword, "page": page, "province_id": proviceId, "ranktype": "", "request_type": 1, "size": size, "type": type, "uri": 'apidata/api/gkv3/school/lists', "signsafe": "7d3ad7653039f90d198e9dad129022c6", } url = base_url + urlencode(data) print(url) try: response = requests.request("get",url, headers = headers) # print(response) if response.status_code == 200: re = response.content.decode('utf-8') print(re) return re # 解析内容 except requests.ConnectionError as e: print('Error', e.args) # 输出异常信息 except (TimeoutError, Exception): n -= 1 if n == 0: print('请求3次均失败,放弃此url请求,检查请求条件') return else: print('请求失败,重新请求') continue if __name__ == '__main__': # get_province() get_page()