From c42d4b7bef8ce8eea0a96a49f4ef860578dc36b0 Mon Sep 17 00:00:00 2001 From: 64478506f791091b8f791bc8 <64478506f791091b8f791bc8@devide> Date: Fri, 19 May 2023 09:17:00 +0000 Subject: [PATCH] Fri May 19 09:17:00 UTC 2023 inscode --- main.py | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/main.py b/main.py index 250bfda..7d4d137 100644 --- a/main.py +++ b/main.py @@ -7,25 +7,29 @@ from lxml import etree import requests from bs4 import BeautifulSoup -url = 'http://yz.yuzhuprice.com:8003/findPriceByName.jspx?page.curPage=1&priceName=%E7%BA%A2%E6%9C%A8%E7%B1%BB' headers = { 'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36", } -response = requests.get(url,headers=headers,timeout=10) +for x in range(1,3): + url = 'http://yz.yuzhuprice.com:8003/findPriceByName.jspx?page.curPage={}&priceName=%E7%BA%A2%E6%9C%A8%E7%B1%BB'.format(x) -# soup = BeautifulSoup(response.content, 'html.parser') + response = requests.get(url,headers=headers,timeout=10) + + # soup = BeautifulSoup(response.content, 'html.parser') + + # for link in soup.find_all('a'): + # print(link.get('href')) + html = response.text + parse = etree.HTML(html) + all_tr = parse.xpath('//*[@id="173200"]') + for tr in all_tr: + tr = { + 'name': ''.join(tr.xpath('./td[1]/text()')).strip(), + 'price': ''.join(tr.xpath('./td[2]/text()')).strip(), + 'unit': ''.join(tr.xpath('./td[3]/text()')).strip(), + 'supermaket': ''.join(tr.xpath('./td[4]/text()')).strip(), + 'time': ''.join(tr.xpath('./td[5]/text()')).strip() + } + print(tr) -# for link in soup.find_all('a'): -# print(link.get('href')) -html = response.text -parse = etree.HTML(html) -all_tr = parse.xpath('//*[@id="173200"]') -for tr in all_tr: - tr = { - 'name': ''.join(tr.xpath('./td[1]/text()')).strip(), - 'price': ''.join(tr.xpath('./td[2]/text()')).strip(), - 'unit': ''.join(tr.xpath('./td[3]/text()')).strip(), - 'supermaket': ''.join(tr.xpath('./td[4]/text()')).strip(), - 'time': ''.join(tr.xpath('./td[5]/text()')).strip() - } -- GitLab