单线程版本.py 587 字节
Newer Older
梦想橡皮擦's avatar
梦想橡皮擦 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
from requests_html import HTMLSession

session = HTMLSession()

page_size = int(input("请输入总页码:"))
for page in range(1, page_size + 1):

    world = session.get(f'http://www.world68.com/top.asp?t=5star&page={page}')
    world.encoding = 'gb2312'
    # world.html.encoding = "gb2312"
    # print(world.text)
    print("正在采集数据", world.url)
    title_a = world.html.find('dl>dt>a')
    for item in title_a:
        name = item.text
        url = item.attrs['href']
        with open('webs1.txt', "a+", encoding="utf-8") as f:
            f.write(f"{name},{url}\n")