diff --git a/src/tag_source/stackoverflow.py b/src/tag_source/stackoverflow.py index 0c9ad50691ae1f1983cdad2a97e0f704d1e79f61..98ac2f21a33c987f3744d0d61b6015a87a55c2ef 100644 --- a/src/tag_source/stackoverflow.py +++ b/src/tag_source/stackoverflow.py @@ -9,7 +9,7 @@ from scrapy.crawler import CrawlerProcess from scrapy.settings import Settings class StackOverflowTagSpider(scrapy.Spider): - name = "vscode_tags" + name = "stackoverflow_tags" allowed_domains = ["visualstudio.com"] start_urls = ['https://stackoverflow.com/tags/synonyms?page=1'] custom_settings = { diff --git a/src/tag_source/vscode.py b/src/tag_source/vscode.py index eafba58d8a7a87c6101ea445fef298fe1f391ffc..4f9bee3a5402f418d390652b9e1f32efcdb835d9 100644 --- a/src/tag_source/vscode.py +++ b/src/tag_source/vscode.py @@ -7,29 +7,14 @@ import scrapy from scrapy.crawler import CrawlerProcess from scrapy.settings import Settings -class CategoryItem(scrapy.Item): - name = scrapy.Field() - addr = scrapy.Field() - -class TagItem(scrapy.Item): - name = scrapy.Field() - class VSCodeTagSpider(scrapy.Spider): name = "vscode_tags" allowed_domains = ["visualstudio.com"] - # start_urls = ['https://marketplace.visualstudio.com/search?target=VSCode&category=All%20categories&sortBy=Installs'] - start_urls = ['https://stackoverflow.com/tags/synonyms?page=1'] + start_urls = ['https://marketplace.visualstudio.com/search?target=VSCode&category=All%20categories&sortBy=Installs'] def parse(self, response): print('todo') -class Categoryline(object): - def process_item(self, item, spider): - headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0'} - req = urllib.request.Request(url=item['addr'],headers=headers) - res = urllib.request.urlopen(req) - - def fetch(): settings = Settings() process = CrawlerProcess()