提交 dc1f9ad2 编写于 作者: P Paul Tremberth 提交者: GitHub

Merge pull request #2307 from eLRuLL/genspider-no-www-fix

genspider: removing www. from starturl templates
......@@ -5,9 +5,7 @@ import scrapy
class $classname(scrapy.Spider):
name = "$name"
allowed_domains = ["$domain"]
start_urls = (
'http://www.$domain/',
)
start_urls = ['http://$domain/']
def parse(self, response):
pass
......@@ -7,7 +7,7 @@ from scrapy.spiders import CrawlSpider, Rule
class $classname(CrawlSpider):
name = '$name'
allowed_domains = ['$domain']
start_urls = ['http://www.$domain/']
start_urls = ['http://$domain/']
rules = (
Rule(LinkExtractor(allow=r'Items/'), callback='parse_item', follow=True),
......
......@@ -5,7 +5,7 @@ from scrapy.spiders import CSVFeedSpider
class $classname(CSVFeedSpider):
name = '$name'
allowed_domains = ['$domain']
start_urls = ['http://www.$domain/feed.csv']
start_urls = ['http://$domain/feed.csv']
# headers = ['id', 'name', 'description', 'image_link']
# delimiter = '\t'
......
......@@ -5,7 +5,7 @@ from scrapy.spiders import XMLFeedSpider
class $classname(XMLFeedSpider):
name = '$name'
allowed_domains = ['$domain']
start_urls = ['http://www.$domain/feed.xml']
start_urls = ['http://$domain/feed.xml']
iterator = 'iternodes' # you can change this; see the docs
itertag = 'item' # change it accordingly
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册