更新了爬虫部分代码

3e313ffb · 骆昊的技术专栏 · 98dc244c · 3e313ffb
隐藏空白更改
内联并排

Showing with 7 addition and 2 deletion

Day66-75/code/image360/image360/spiders/taobao.py Day66-75/code/image360/image360/spiders/taobao.py +7 -2

未找到文件。
--- a/Day66-75/code/image360/image360/spiders/taobao.py
+++ b/Day66-75/code/image360/image360/spiders/taobao.py
 # -*- coding: utf-8 -*-
+from io import StringIO
 from urllib.parse import urlencode
+import re

 import scrapy

@@ -26,6 +28,9 @@ class TaobaoSpider(scrapy.Spider):
            item = GoodsItem()
            item['price'] = goods.xpath('div[5]/div[2]/div[1]/div[1]/strong/text()').extract_first()
            item['deal'] = goods.xpath('div[5]/div[2]/div[1]/div[2]/text()').extract_first()
-            item['title'] = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract_first()
+            segments = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract()
+            title = StringIO()
+            for segment in segments:
+                title.write(re.sub('\s', '', segment))
+            item['title'] = title.getvalue()
            yield item
-