Fri May 19 08:52:00 UTC 2023 inscode

上级 df352508
print('欢迎来到 InsCode') import csv #用于把爬取的数据存储为csv格式,可以excel直接打开的
\ No newline at end of file import time #用于对请求加延时,爬取速度太快容易被反爬
from time import sleep #同上
import random #用于对延时设置随机数,尽量模拟人的行为
import requests #用于向网站发送请求
from lxml import etree
import requests
from bs4 import BeautifulSoup
url = 'http://yz.yuzhuprice.com:8003/findPriceByName.jspx?page.curPage=1&priceName=%E7%BA%A2%E6%9C%A8%E7%B1%BB'
headers = {
'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36",
}
response = requests.get(url,headers=headers,timeout=10)
# soup = BeautifulSoup(response.content, 'html.parser')
# for link in soup.find_all('a'):
# print(link.get('href'))
html = response.text
parse = etree.HTML(html)
all_tr = parse.xpath('//*[@id="173200"]')
for tr in all_tr:
tr = {
'name': ''.join(tr.xpath('./td[1]/text()')).strip(),
'price': ''.join(tr.xpath('./td[2]/text()')).strip(),
'unit': ''.join(tr.xpath('./td[3]/text()')).strip(),
'supermaket': ''.join(tr.xpath('./td[4]/text()')).strip(),
'time': ''.join(tr.xpath('./td[5]/text()')).strip()
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册