From 3e313ffb91d4f130a21d9316497c0fe513504184 Mon Sep 17 00:00:00 2001 From: jackfrued Date: Sat, 9 Jun 2018 10:36:52 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E4=BA=86=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E9=83=A8=E5=88=86=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Day66-75/code/image360/image360/spiders/taobao.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Day66-75/code/image360/image360/spiders/taobao.py b/Day66-75/code/image360/image360/spiders/taobao.py index abc08ea..41213ac 100644 --- a/Day66-75/code/image360/image360/spiders/taobao.py +++ b/Day66-75/code/image360/image360/spiders/taobao.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +from io import StringIO from urllib.parse import urlencode +import re import scrapy @@ -26,6 +28,9 @@ class TaobaoSpider(scrapy.Spider): item = GoodsItem() item['price'] = goods.xpath('div[5]/div[2]/div[1]/div[1]/strong/text()').extract_first() item['deal'] = goods.xpath('div[5]/div[2]/div[1]/div[2]/text()').extract_first() - item['title'] = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract_first() + segments = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract() + title = StringIO() + for segment in segments: + title.write(re.sub('\s', '', segment)) + item['title'] = title.getvalue() yield item - -- GitLab