Fri May 19 09:17:00 UTC 2023 inscode

上级 63c061a3
...@@ -7,20 +7,22 @@ from lxml import etree ...@@ -7,20 +7,22 @@ from lxml import etree
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
url = 'http://yz.yuzhuprice.com:8003/findPriceByName.jspx?page.curPage=1&priceName=%E7%BA%A2%E6%9C%A8%E7%B1%BB'
headers = { headers = {
'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36", 'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36",
} }
response = requests.get(url,headers=headers,timeout=10) for x in range(1,3):
url = 'http://yz.yuzhuprice.com:8003/findPriceByName.jspx?page.curPage={}&priceName=%E7%BA%A2%E6%9C%A8%E7%B1%BB'.format(x)
# soup = BeautifulSoup(response.content, 'html.parser') response = requests.get(url,headers=headers,timeout=10)
# for link in soup.find_all('a'): # soup = BeautifulSoup(response.content, 'html.parser')
# print(link.get('href'))
html = response.text # for link in soup.find_all('a'):
parse = etree.HTML(html) # print(link.get('href'))
all_tr = parse.xpath('//*[@id="173200"]') html = response.text
for tr in all_tr: parse = etree.HTML(html)
all_tr = parse.xpath('//*[@id="173200"]')
for tr in all_tr:
tr = { tr = {
'name': ''.join(tr.xpath('./td[1]/text()')).strip(), 'name': ''.join(tr.xpath('./td[1]/text()')).strip(),
'price': ''.join(tr.xpath('./td[2]/text()')).strip(), 'price': ''.join(tr.xpath('./td[2]/text()')).strip(),
...@@ -28,4 +30,6 @@ for tr in all_tr: ...@@ -28,4 +30,6 @@ for tr in all_tr:
'supermaket': ''.join(tr.xpath('./td[4]/text()')).strip(), 'supermaket': ''.join(tr.xpath('./td[4]/text()')).strip(),
'time': ''.join(tr.xpath('./td[5]/text()')).strip() 'time': ''.join(tr.xpath('./td[5]/text()')).strip()
} }
print(tr)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册