提交 3e313ffb 编写于 作者: 骆昊的技术专栏's avatar 骆昊的技术专栏

更新了爬虫部分代码

上级 98dc244c
# -*- coding: utf-8 -*-
from io import StringIO
from urllib.parse import urlencode
import re
import scrapy
......@@ -26,6 +28,9 @@ class TaobaoSpider(scrapy.Spider):
item = GoodsItem()
item['price'] = goods.xpath('div[5]/div[2]/div[1]/div[1]/strong/text()').extract_first()
item['deal'] = goods.xpath('div[5]/div[2]/div[1]/div[2]/text()').extract_first()
item['title'] = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract_first()
segments = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract()
title = StringIO()
for segment in segments:
title.write(re.sub('\s', '', segment))
item['title'] = title.getvalue()
yield item
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册