提交 30eb3273 编写于 作者: F feilong

改进代码

上级 94d7588d
......@@ -9,7 +9,7 @@ from scrapy.crawler import CrawlerProcess
from scrapy.settings import Settings
class StackOverflowTagSpider(scrapy.Spider):
name = "vscode_tags"
name = "stackoverflow_tags"
allowed_domains = ["visualstudio.com"]
start_urls = ['https://stackoverflow.com/tags/synonyms?page=1']
custom_settings = {
......
......@@ -7,29 +7,14 @@ import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.settings import Settings
class CategoryItem(scrapy.Item):
name = scrapy.Field()
addr = scrapy.Field()
class TagItem(scrapy.Item):
name = scrapy.Field()
class VSCodeTagSpider(scrapy.Spider):
name = "vscode_tags"
allowed_domains = ["visualstudio.com"]
# start_urls = ['https://marketplace.visualstudio.com/search?target=VSCode&category=All%20categories&sortBy=Installs']
start_urls = ['https://stackoverflow.com/tags/synonyms?page=1']
start_urls = ['https://marketplace.visualstudio.com/search?target=VSCode&category=All%20categories&sortBy=Installs']
def parse(self, response):
print('todo')
class Categoryline(object):
def process_item(self, item, spider):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0'}
req = urllib.request.Request(url=item['addr'],headers=headers)
res = urllib.request.urlopen(req)
def fetch():
settings = Settings()
process = CrawlerProcess()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册