在行爬虫，超级产品经理爬虫，优设网爬虫

f2095857 · 梦想橡皮擦 · 8690fb95 · f2095857 · f2095857 · f2095857
44 changed file
--- a/NO42/my_project/__init__.py
+++ b/NO42/my_project/__init__.py
--- a/NO42/my_project/__pycache__/__init__.cpython-37.pyc
+++ b/NO42/my_project/__pycache__/__init__.cpython-37.pyc
--- a/NO42/my_project/__pycache__/items.cpython-37.pyc
+++ b/NO42/my_project/__pycache__/items.cpython-37.pyc
--- a/NO42/my_project/__pycache__/pipelines.cpython-37.pyc
+++ b/NO42/my_project/__pycache__/pipelines.cpython-37.pyc
--- a/NO42/my_project/__pycache__/settings.cpython-37.pyc
+++ b/NO42/my_project/__pycache__/settings.cpython-37.pyc
--- a/NO42/my_project/items.py
+++ b/NO42/my_project/items.py
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class ArticleItem(scrapy.Item):
+    # define the fields for your item here like:
+    # name = scrapy.Field()
+    title = scrapy.Field()  # 文章标题
+    url = scrapy.Field()  # 文章地址
+    author = scrapy.Field()  # 作者
--- a/NO42/my_project/middlewares.py
+++ b/NO42/my_project/middlewares.py
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+# useful for handling different item types with a single interface
+from itemadapter import is_item, ItemAdapter
+
+
+class MyProjectSpiderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, or item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request or item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class MyProjectDownloaderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
--- a/NO42/my_project/pipelines.py
+++ b/NO42/my_project/pipelines.py
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+
+
+# useful for handling different item types with a single interface
+from itemadapter import ItemAdapter
+from scrapy.exceptions import DropItem
+
+
+class TitlePipeline:
+    def process_item(self, item, spider):  # 移除标题中的空格
+        if item["title"]:
+            item["title"] = item["title"].strip()
+            return item
+        else:
+            return DropItem("异常数据")
--- a/NO42/my_project/settings.py
+++ b/NO42/my_project/settings.py
+# Scrapy settings for my_project project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://docs.scrapy.org/en/latest/topics/settings.html
+#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'my_project'
+
+SPIDER_MODULES = ['my_project.spiders']
+NEWSPIDER_MODULE = 'my_project.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+#USER_AGENT = 'my_project (+http://www.yourdomain.com)'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+#    'my_project.middlewares.MyProjectSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+#    'my_project.middlewares.MyProjectDownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+   'my_project.pipelines.TitlePipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
--- a/NO42/my_project/spiders/__init__.py
+++ b/NO42/my_project/spiders/__init__.py
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
--- a/NO42/my_project/spiders/__pycache__/__init__.cpython-37.pyc
+++ b/NO42/my_project/spiders/__pycache__/__init__.cpython-37.pyc
--- a/NO42/my_project/spiders/__pycache__/pm.cpython-37.pyc
+++ b/NO42/my_project/spiders/__pycache__/pm.cpython-37.pyc
--- a/NO42/my_project/spiders/pm.py
+++ b/NO42/my_project/spiders/pm.py
+import scrapy
+from my_project.items import ArticleItem
+
+
+class PmSpider(scrapy.Spider):
+    name = 'pm'
+    allowed_domains = ['www.imspm.com']
+    start_urls = ['http://www.imspm.com/chanpin/']
+
+    def parse(self, response):
+        # print(response.text)
+        list_item = response.css('.list-item-default')
+        # print(list_item)
+        for i in list_item:
+            item = ArticleItem()
+            title = i.css('.title::text').extract_first()  # 直接获取文本
+            url = i.css('.a_block::attr(href)').extract_first()  # 获取属性值
+            author = i.css('.author::text').extract_first()  # 直接获取文本
+            # print(title, url, author)
+            # 对 item 进行赋值
+            item['title'] = title
+            item['url'] = url
+            item['author'] = author
+            yield item
+        next = response.css('.nav a:nth-last-child(2)::attr(href)').extract_first()  # 获取下一页链接
+        # print(next)
+        # 再次生成一个请求
+        yield scrapy.Request(url=next, callback=self.parse)
--- a/NO42/pm.jl
+++ b/NO42/pm.jl
--- a/NO42/pm.json
+++ b/NO42/pm.json
--- a/NO42/scrapy.cfg
+++ b/NO42/scrapy.cfg
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = my_project.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = my_project
--- a/NO43/zaihang/scrapy.cfg
+++ b/NO43/zaihang/scrapy.cfg
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = zaihang_spider.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = zaihang_spider
--- a/NO43/zaihang/zaihang_spider/__init__.py
+++ b/NO43/zaihang/zaihang_spider/__init__.py
--- a/NO43/zaihang/zaihang_spider/__pycache__/__init__.cpython-37.pyc
+++ b/NO43/zaihang/zaihang_spider/__pycache__/__init__.cpython-37.pyc
--- a/NO43/zaihang/zaihang_spider/__pycache__/items.cpython-37.pyc
+++ b/NO43/zaihang/zaihang_spider/__pycache__/items.cpython-37.pyc
--- a/NO43/zaihang/zaihang_spider/__pycache__/pipelines.cpython-37.pyc
+++ b/NO43/zaihang/zaihang_spider/__pycache__/pipelines.cpython-37.pyc
--- a/NO43/zaihang/zaihang_spider/__pycache__/settings.cpython-37.pyc
+++ b/NO43/zaihang/zaihang_spider/__pycache__/settings.cpython-37.pyc
--- a/NO43/zaihang/zaihang_spider/items.py
+++ b/NO43/zaihang/zaihang_spider/items.py
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class ZaihangItem(scrapy.Item):
+    # define the fields for your item here like:
+    # name = scrapy.Field()
+    name = scrapy.Field()  # 姓名
+    city = scrapy.Field()  # 城市
+    industry = scrapy.Field()  # 行业
+    price = scrapy.Field()  # 价格
+    chat_nums = scrapy.Field()  # 聊天人数
+    score = scrapy.Field()  # 评分
+
--- a/NO43/zaihang/zaihang_spider/middlewares.py
+++ b/NO43/zaihang/zaihang_spider/middlewares.py
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+# useful for handling different item types with a single interface
+from itemadapter import is_item, ItemAdapter
+
+
+class ZaihangSpiderSpiderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, or item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request or item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class ZaihangSpiderDownloaderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
--- a/NO43/zaihang/zaihang_spider/pipelines.py
+++ b/NO43/zaihang/zaihang_spider/pipelines.py
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+
+
+# useful for handling different item types with a single interface
+
+from itemadapter import ItemAdapter
+import pymysql
+
+
+class ZaihangMySQLPipeline:
+    def __init__(self, host, port, user, password, db):
+        self.host = host
+        self.port = port
+        self.user = user
+        self.password = password
+        self.db = db
+        self.conn = None
+        self.cursor = None
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(
+            host=crawler.settings.get('HOST'),
+            port=crawler.settings.get('PORT'),
+            user=crawler.settings.get('USER'),
+            password=crawler.settings.get('PASSWORD'),
+            db=crawler.settings.get('DB')
+        )
+
+    def open_spider(self, spider):
+        self.conn = pymysql.connect(host=self.host, port=self.port, user=self.user, password=self.password, db=self.db)
+
+    def process_item(self, item, spider):
+        print(item)
+        name = item["name"]
+        city = item["city"]
+
+        industry = item["industry"]
+        price = item["price"]
+        chat_nums = item["chat_nums"]
+        score = item["score"]
+
+        sql = "insert into users(name,city,industry,price,chat_nums,score) values ('%s','%s','%s',%.1f,%d,%.1f)" % (
+            name, city, industry, float(price), int(chat_nums), float(score))
+        print(sql)
+        self.cursor = self.conn.cursor()  # 设置游标
+
+        try:
+            self.cursor.execute(sql)  # 执行 sql
+            self.conn.commit()
+        except Exception as e:
+            print(e)
+            self.conn.rollback()
+        return item
+
+    def close_spider(self, spider):
+        self.cursor.close()
+        self.conn.close()
--- a/NO43/zaihang/zaihang_spider/settings.py
+++ b/NO43/zaihang/zaihang_spider/settings.py
+# Scrapy settings for zaihang_spider project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://docs.scrapy.org/en/latest/topics/settings.html
+#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'zaihang_spider'
+
+SPIDER_MODULES = ['zaihang_spider.spiders']
+NEWSPIDER_MODULE = 'zaihang_spider.spiders'
+
+HOST = "127.0.0.1"
+PORT = 3306
+USER = "root"
+PASSWORD = "123456"
+DB = "zaihang"
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+#USER_AGENT = 'zaihang_spider (+http://www.yourdomain.com)'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+#    'zaihang_spider.middlewares.ZaihangSpiderSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+#    'zaihang_spider.middlewares.ZaihangSpiderDownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+   'zaihang_spider.pipelines.ZaihangMySQLPipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
--- a/NO43/zaihang/zaihang_spider/spiders/__init__.py
+++ b/NO43/zaihang/zaihang_spider/spiders/__init__.py
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
--- a/NO43/zaihang/zaihang_spider/spiders/__pycache__/__init__.cpython-37.pyc
+++ b/NO43/zaihang/zaihang_spider/spiders/__pycache__/__init__.cpython-37.pyc
--- a/NO43/zaihang/zaihang_spider/spiders/__pycache__/zh.cpython-37.pyc
+++ b/NO43/zaihang/zaihang_spider/spiders/__pycache__/zh.cpython-37.pyc
--- a/NO43/zaihang/zaihang_spider/spiders/zh.py
+++ b/NO43/zaihang/zaihang_spider/spiders/zh.py
+import scrapy
+from zaihang_spider.items import ZaihangItem
+
+
+class ZhSpider(scrapy.Spider):
+    name = 'zh'
+    allowed_domains = ['www.zaih.com']
+    page = 1  # 起始页码
+    url_format = 'https://www.zaih.com/falcon/mentors?first_tag_id=479&first_tag_name=%E5%BF%83%E7%90%86&page={}'  # 模板
+    start_urls = [url_format.format(page)]
+
+    def parse(self, response):
+        empty = response.css("section.empty")
+        if len(empty) > 0:
+            return
+        mentors = response.css(".mentor-board a")
+        for m in mentors:
+            item = ZaihangItem()
+            name = m.css(".mentor-card__name::text").extract_first()
+            city = m.css(".mentor-card__location::text").extract_first()
+            industry = m.css(".mentor-card__title::text").extract_first()
+            price = self.replace_space(m.css(".mentor-card__price::text").extract_first())
+            chat_nums = self.replace_space(m.css(".mentor-card__number::text").extract()[0])
+            score = self.replace_space(m.css(".mentor-card__number::text").extract()[1])
+
+            # 格式化数据
+            item["name"] = name
+            item["city"] = city
+            item["industry"] = industry
+            item["price"] = price
+            item["chat_nums"] = chat_nums
+            item["score"] = score
+
+            yield item
+        # 再次生成一个请求
+        self.page += 1
+        next_url = format(self.url_format.format(self.page))
+
+        yield scrapy.Request(url=next_url, callback=self.parse)
+
+    def replace_space(self, in_str):
+        in_str = in_str.replace("\n", "").replace("\r", "").replace("￥", "")
+        return in_str.strip()
--- a/NO44/uisdc/aa.csv
+++ b/NO44/uisdc/aa.csv
+author,tag,title
+阿东,B端,8000+干货！B端设计师要懂的信息架构
+陈子木,3D样机,第一波！2021年10月精选实用设计干货合集
+张爱国,LOL,人气顶流的英雄联盟手游，有哪些值得关注的设计细节？
+邓海贝,对比色,配色没有冲击力？来试试对比色！
+土拨鼠,ui设计,这3个细节创新，大厂设计师都不一定全知道！
+Reman刘斌,平面设计,民国时期的平面设计到底有多潮？穿着旗袍开机车！
+米米米米米米米米,,甲方给的图片不清晰？收下这个超好用的免费无损放大软件 Topaz Gigapixel AI
+土拨鼠,抄袭,设计该如何看待「抄袭」这件事？我总结了5个方面！
+土拨鼠,内容优先,用一篇文章，帮你了解提高用户体验的「内容策划研究方法」
+土拨鼠,,她做完这个暗黑设计后，为什么选择了离职？
+陈子木,免费图库,壹周速读：假期归来先存好这些干货！
+张彭彭,PPT设计,大厂进阶案例！腾讯D10晋升失败的复盘总结
+酷家乐UED,业务分析,需求太碎？聊聊设计师如何在小业务中提炼价值
+Ohh,交互设计,对比多个大厂产品后，总结了竖屏播放进度条的设计思考
+小果,交互设计,如何实现产品的「啊哈时刻」？从4个方面展开详聊
+郝小七,HMI,HMI设计必看！入局车载设计的最优路径+入门指南
+土拨鼠,B端,8000字干货！B端用户「帮助体系」搭建指南
+土拨鼠,医疗,腾讯实战案例！联动医疗下的用户研究
+土拨鼠,Decentrafile,上传文件永不过期！免费的匿名去中心化云端空间「Decentrafile 」
+ZoeYZ,交互设计,不止画图标！5 个金刚区的交互设计思考
+土拨鼠,2021-2022设计趋势,腾讯 ISUX 出品！2021-2022设计趋势报告：日系潮玩篇
+土拨鼠,3D渲染模板,2021年全网最新最好的样机资源被我找到了！免费打包！
+S 设计研究所,产品视觉设计,从学校到五年职场，我是如何理解插画设计的
+土拨鼠,Image Extractor,输入网址就能批量下载网站图片的在线神器「Image Extractor」
+陈子木,干货,第四波！2021年9月精选实用设计干货合集
+土拨鼠,平面设计,用一篇文章，帮你了解视觉冲击的最佳风格「达达主义」
+百度MEUX,产品设计,大厂出品！如何通过游戏化设计助力直播互动？
+土拨鼠,在线工具,收录超过 30 万个高质量免费图标的网站「PNG Repo」
+思鱼耶,平面设计,文字太多画面没有层次感？优设标题黑主设计师的版式设计秘籍来了
+土拨鼠,产品设计,6000+干货！资深总监的四条产品设计工作观（附私藏神器包）
+author,tag,title
+阿东,B端,8000+干货！B端设计师要懂的信息架构
+陈子木,3D样机,第一波！2021年10月精选实用设计干货合集
+张爱国,LOL,人气顶流的英雄联盟手游，有哪些值得关注的设计细节？
+邓海贝,对比色,配色没有冲击力？来试试对比色！
+土拨鼠,ui设计,这3个细节创新，大厂设计师都不一定全知道！
+Reman刘斌,平面设计,民国时期的平面设计到底有多潮？穿着旗袍开机车！
+米米米米米米米米,,甲方给的图片不清晰？收下这个超好用的免费无损放大软件 Topaz Gigapixel AI
+土拨鼠,抄袭,设计该如何看待「抄袭」这件事？我总结了5个方面！
+土拨鼠,内容优先,用一篇文章，帮你了解提高用户体验的「内容策划研究方法」
+土拨鼠,,她做完这个暗黑设计后，为什么选择了离职？
+陈子木,免费图库,壹周速读：假期归来先存好这些干货！
+张彭彭,PPT设计,大厂进阶案例！腾讯D10晋升失败的复盘总结
+酷家乐UED,业务分析,需求太碎？聊聊设计师如何在小业务中提炼价值
+Ohh,交互设计,对比多个大厂产品后，总结了竖屏播放进度条的设计思考
+小果,交互设计,如何实现产品的「啊哈时刻」？从4个方面展开详聊
+郝小七,HMI,HMI设计必看！入局车载设计的最优路径+入门指南
+土拨鼠,B端,8000字干货！B端用户「帮助体系」搭建指南
+土拨鼠,医疗,腾讯实战案例！联动医疗下的用户研究
+土拨鼠,Decentrafile,上传文件永不过期！免费的匿名去中心化云端空间「Decentrafile 」
+ZoeYZ,交互设计,不止画图标！5 个金刚区的交互设计思考
+土拨鼠,2021-2022设计趋势,腾讯 ISUX 出品！2021-2022设计趋势报告：日系潮玩篇
+土拨鼠,3D渲染模板,2021年全网最新最好的样机资源被我找到了！免费打包！
+S 设计研究所,产品视觉设计,从学校到五年职场，我是如何理解插画设计的
+土拨鼠,Image Extractor,输入网址就能批量下载网站图片的在线神器「Image Extractor」
+陈子木,干货,第四波！2021年9月精选实用设计干货合集
+土拨鼠,平面设计,用一篇文章，帮你了解视觉冲击的最佳风格「达达主义」
+百度MEUX,产品设计,大厂出品！如何通过游戏化设计助力直播互动？
+土拨鼠,在线工具,收录超过 30 万个高质量免费图标的网站「PNG Repo」
+思鱼耶,平面设计,文字太多画面没有层次感？优设标题黑主设计师的版式设计秘籍来了
+土拨鼠,产品设计,6000+干货！资深总监的四条产品设计工作观（附私藏神器包）
+author,tag,title
+阿东,B端,8000+干货！B端设计师要懂的信息架构
+陈子木,3D样机,第一波！2021年10月精选实用设计干货合集
+张爱国,LOL,人气顶流的英雄联盟手游，有哪些值得关注的设计细节？
+邓海贝,对比色,配色没有冲击力？来试试对比色！
+土拨鼠,ui设计,这3个细节创新，大厂设计师都不一定全知道！
+Reman刘斌,平面设计,民国时期的平面设计到底有多潮？穿着旗袍开机车！
+米米米米米米米米,,甲方给的图片不清晰？收下这个超好用的免费无损放大软件 Topaz Gigapixel AI
+土拨鼠,抄袭,设计该如何看待「抄袭」这件事？我总结了5个方面！
+土拨鼠,内容优先,用一篇文章，帮你了解提高用户体验的「内容策划研究方法」
+土拨鼠,,她做完这个暗黑设计后，为什么选择了离职？
+陈子木,免费图库,壹周速读：假期归来先存好这些干货！
+张彭彭,PPT设计,大厂进阶案例！腾讯D10晋升失败的复盘总结
+酷家乐UED,业务分析,需求太碎？聊聊设计师如何在小业务中提炼价值
+Ohh,交互设计,对比多个大厂产品后，总结了竖屏播放进度条的设计思考
+小果,交互设计,如何实现产品的「啊哈时刻」？从4个方面展开详聊
+郝小七,HMI,HMI设计必看！入局车载设计的最优路径+入门指南
+土拨鼠,B端,8000字干货！B端用户「帮助体系」搭建指南
+土拨鼠,医疗,腾讯实战案例！联动医疗下的用户研究
+土拨鼠,Decentrafile,上传文件永不过期！免费的匿名去中心化云端空间「Decentrafile 」
+ZoeYZ,交互设计,不止画图标！5 个金刚区的交互设计思考
+土拨鼠,2021-2022设计趋势,腾讯 ISUX 出品！2021-2022设计趋势报告：日系潮玩篇
+土拨鼠,3D渲染模板,2021年全网最新最好的样机资源被我找到了！免费打包！
+S 设计研究所,产品视觉设计,从学校到五年职场，我是如何理解插画设计的
+土拨鼠,Image Extractor,输入网址就能批量下载网站图片的在线神器「Image Extractor」
+陈子木,干货,第四波！2021年9月精选实用设计干货合集
+土拨鼠,平面设计,用一篇文章，帮你了解视觉冲击的最佳风格「达达主义」
+百度MEUX,产品设计,大厂出品！如何通过游戏化设计助力直播互动？
+土拨鼠,在线工具,收录超过 30 万个高质量免费图标的网站「PNG Repo」
+思鱼耶,平面设计,文字太多画面没有层次感？优设标题黑主设计师的版式设计秘籍来了
+土拨鼠,产品设计,6000+干货！资深总监的四条产品设计工作观（附私藏神器包）
+author,tag,title
+阿东,B端,8000+干货！B端设计师要懂的信息架构
+陈子木,3D样机,第一波！2021年10月精选实用设计干货合集
+张爱国,LOL,人气顶流的英雄联盟手游，有哪些值得关注的设计细节？
+邓海贝,对比色,配色没有冲击力？来试试对比色！
+土拨鼠,ui设计,这3个细节创新，大厂设计师都不一定全知道！
+Reman刘斌,平面设计,民国时期的平面设计到底有多潮？穿着旗袍开机车！
+米米米米米米米米,,甲方给的图片不清晰？收下这个超好用的免费无损放大软件 Topaz Gigapixel AI
+土拨鼠,抄袭,设计该如何看待「抄袭」这件事？我总结了5个方面！
+土拨鼠,内容优先,用一篇文章，帮你了解提高用户体验的「内容策划研究方法」
+土拨鼠,,她做完这个暗黑设计后，为什么选择了离职？
+陈子木,免费图库,壹周速读：假期归来先存好这些干货！
+张彭彭,PPT设计,大厂进阶案例！腾讯D10晋升失败的复盘总结
+酷家乐UED,业务分析,需求太碎？聊聊设计师如何在小业务中提炼价值
+Ohh,交互设计,对比多个大厂产品后，总结了竖屏播放进度条的设计思考
+小果,交互设计,如何实现产品的「啊哈时刻」？从4个方面展开详聊
+郝小七,HMI,HMI设计必看！入局车载设计的最优路径+入门指南
+土拨鼠,B端,8000字干货！B端用户「帮助体系」搭建指南
+土拨鼠,医疗,腾讯实战案例！联动医疗下的用户研究
+土拨鼠,Decentrafile,上传文件永不过期！免费的匿名去中心化云端空间「Decentrafile 」
+ZoeYZ,交互设计,不止画图标！5 个金刚区的交互设计思考
+土拨鼠,2021-2022设计趋势,腾讯 ISUX 出品！2021-2022设计趋势报告：日系潮玩篇
+土拨鼠,3D渲染模板,2021年全网最新最好的样机资源被我找到了！免费打包！
+S 设计研究所,产品视觉设计,从学校到五年职场，我是如何理解插画设计的
+土拨鼠,Image Extractor,输入网址就能批量下载网站图片的在线神器「Image Extractor」
+陈子木,干货,第四波！2021年9月精选实用设计干货合集
+土拨鼠,平面设计,用一篇文章，帮你了解视觉冲击的最佳风格「达达主义」
+百度MEUX,产品设计,大厂出品！如何通过游戏化设计助力直播互动？
+土拨鼠,在线工具,收录超过 30 万个高质量免费图标的网站「PNG Repo」
+思鱼耶,平面设计,文字太多画面没有层次感？优设标题黑主设计师的版式设计秘籍来了
+土拨鼠,产品设计,6000+干货！资深总监的四条产品设计工作观（附私藏神器包）
+author,tag,title
+阿东,B端,8000+干货！B端设计师要懂的信息架构
+陈子木,3D样机,第一波！2021年10月精选实用设计干货合集
+张爱国,LOL,人气顶流的英雄联盟手游，有哪些值得关注的设计细节？
+邓海贝,对比色,配色没有冲击力？来试试对比色！
+土拨鼠,ui设计,这3个细节创新，大厂设计师都不一定全知道！
+Reman刘斌,平面设计,民国时期的平面设计到底有多潮？穿着旗袍开机车！
+米米米米米米米米,,甲方给的图片不清晰？收下这个超好用的免费无损放大软件 Topaz Gigapixel AI
+土拨鼠,抄袭,设计该如何看待「抄袭」这件事？我总结了5个方面！
+土拨鼠,内容优先,用一篇文章，帮你了解提高用户体验的「内容策划研究方法」
+土拨鼠,,她做完这个暗黑设计后，为什么选择了离职？
+陈子木,免费图库,壹周速读：假期归来先存好这些干货！
+张彭彭,PPT设计,大厂进阶案例！腾讯D10晋升失败的复盘总结
+酷家乐UED,业务分析,需求太碎？聊聊设计师如何在小业务中提炼价值
+Ohh,交互设计,对比多个大厂产品后，总结了竖屏播放进度条的设计思考
+小果,交互设计,如何实现产品的「啊哈时刻」？从4个方面展开详聊
+郝小七,HMI,HMI设计必看！入局车载设计的最优路径+入门指南
+土拨鼠,B端,8000字干货！B端用户「帮助体系」搭建指南
+土拨鼠,医疗,腾讯实战案例！联动医疗下的用户研究
+土拨鼠,Decentrafile,上传文件永不过期！免费的匿名去中心化云端空间「Decentrafile 」
+ZoeYZ,交互设计,不止画图标！5 个金刚区的交互设计思考
+土拨鼠,2021-2022设计趋势,腾讯 ISUX 出品！2021-2022设计趋势报告：日系潮玩篇
+土拨鼠,3D渲染模板,2021年全网最新最好的样机资源被我找到了！免费打包！
+S 设计研究所,产品视觉设计,从学校到五年职场，我是如何理解插画设计的
+土拨鼠,Image Extractor,输入网址就能批量下载网站图片的在线神器「Image Extractor」
+陈子木,干货,第四波！2021年9月精选实用设计干货合集
+土拨鼠,平面设计,用一篇文章，帮你了解视觉冲击的最佳风格「达达主义」
+百度MEUX,产品设计,大厂出品！如何通过游戏化设计助力直播互动？
+土拨鼠,在线工具,收录超过 30 万个高质量免费图标的网站「PNG Repo」
+思鱼耶,平面设计,文字太多画面没有层次感？优设标题黑主设计师的版式设计秘籍来了
+土拨鼠,产品设计,6000+干货！资深总监的四条产品设计工作观（附私藏神器包）
+author,tag,title
+阿东,B端,新闻：8000+干货！B端设计师要懂的信息架构
+陈子木,3D样机,新闻：第一波！2021年10月精选实用设计干货合集
+张爱国,LOL,新闻：人气顶流的英雄联盟手游，有哪些值得关注的设计细节？
+邓海贝,对比色,新闻：配色没有冲击力？来试试对比色！
+土拨鼠,ui设计,新闻：这3个细节创新，大厂设计师都不一定全知道！
+Reman刘斌,平面设计,新闻：民国时期的平面设计到底有多潮？穿着旗袍开机车！
+米米米米米米米米,,新闻：甲方给的图片不清晰？收下这个超好用的免费无损放大软件 Topaz Gigapixel AI
+土拨鼠,抄袭,新闻：设计该如何看待「抄袭」这件事？我总结了5个方面！
+土拨鼠,内容优先,新闻：用一篇文章，帮你了解提高用户体验的「内容策划研究方法」
+土拨鼠,,新闻：她做完这个暗黑设计后，为什么选择了离职？
+陈子木,免费图库,新闻：壹周速读：假期归来先存好这些干货！
+张彭彭,PPT设计,新闻：大厂进阶案例！腾讯D10晋升失败的复盘总结
+酷家乐UED,业务分析,新闻：需求太碎？聊聊设计师如何在小业务中提炼价值
+Ohh,交互设计,新闻：对比多个大厂产品后，总结了竖屏播放进度条的设计思考
+小果,交互设计,新闻：如何实现产品的「啊哈时刻」？从4个方面展开详聊
+郝小七,HMI,新闻：HMI设计必看！入局车载设计的最优路径+入门指南
+土拨鼠,B端,新闻：8000字干货！B端用户「帮助体系」搭建指南
+土拨鼠,医疗,新闻：腾讯实战案例！联动医疗下的用户研究
+土拨鼠,Decentrafile,新闻：上传文件永不过期！免费的匿名去中心化云端空间「Decentrafile 」
+ZoeYZ,交互设计,新闻：不止画图标！5 个金刚区的交互设计思考
+土拨鼠,2021-2022设计趋势,新闻：腾讯 ISUX 出品！2021-2022设计趋势报告：日系潮玩篇
+土拨鼠,3D渲染模板,新闻：2021年全网最新最好的样机资源被我找到了！免费打包！
+S 设计研究所,产品视觉设计,新闻：从学校到五年职场，我是如何理解插画设计的
+土拨鼠,Image Extractor,新闻：输入网址就能批量下载网站图片的在线神器「Image Extractor」
+陈子木,干货,新闻：第四波！2021年9月精选实用设计干货合集
+土拨鼠,平面设计,新闻：用一篇文章，帮你了解视觉冲击的最佳风格「达达主义」
+百度MEUX,产品设计,新闻：大厂出品！如何通过游戏化设计助力直播互动？
+土拨鼠,在线工具,新闻：收录超过 30 万个高质量免费图标的网站「PNG Repo」
+思鱼耶,平面设计,新闻：文字太多画面没有层次感？优设标题黑主设计师的版式设计秘籍来了
+土拨鼠,产品设计,新闻：6000+干货！资深总监的四条产品设计工作观（附私藏神器包）
--- a/NO44/uisdc/scrapy.cfg
+++ b/NO44/uisdc/scrapy.cfg
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = uisdc.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = uisdc
--- a/NO44/uisdc/uisdc/__init__.py
+++ b/NO44/uisdc/uisdc/__init__.py
--- a/NO44/uisdc/uisdc/__pycache__/__init__.cpython-37.pyc
+++ b/NO44/uisdc/uisdc/__pycache__/__init__.cpython-37.pyc
--- a/NO44/uisdc/uisdc/__pycache__/items.cpython-37.pyc
+++ b/NO44/uisdc/uisdc/__pycache__/items.cpython-37.pyc
--- a/NO44/uisdc/uisdc/__pycache__/settings.cpython-37.pyc
+++ b/NO44/uisdc/uisdc/__pycache__/settings.cpython-37.pyc
--- a/NO44/uisdc/uisdc/items.py
+++ b/NO44/uisdc/uisdc/items.py
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/items.html
+
+from scrapy.item import Item, Field
+from scrapy.loader.processors import MapCompose, TakeFirst
+
+
+def ext(value):
+    return "新闻：" + value
+
+
+class UisdcItem(Item):
+    # define the fields for your item here like:
+    title = Field(
+        input_processor=MapCompose(ext),
+        output_processor=TakeFirst()
+    )
+    author = Field(output_processor=TakeFirst())
+    tag = Field(output_processor=TakeFirst())
--- a/NO44/uisdc/uisdc/middlewares.py
+++ b/NO44/uisdc/uisdc/middlewares.py
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+# useful for handling different item types with a single interface
+from itemadapter import is_item, ItemAdapter
+
+
+class UisdcSpiderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, or item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request or item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class UisdcDownloaderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
--- a/NO44/uisdc/uisdc/pipelines.py
+++ b/NO44/uisdc/uisdc/pipelines.py
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+
+
+# useful for handling different item types with a single interface
+from itemadapter import ItemAdapter
+
+
+class UisdcPipeline:
+    def process_item(self, item, spider):
+        return item
--- a/NO44/uisdc/uisdc/settings.py
+++ b/NO44/uisdc/uisdc/settings.py
+# Scrapy settings for uisdc project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://docs.scrapy.org/en/latest/topics/settings.html
+#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'uisdc'
+
+SPIDER_MODULES = ['uisdc.spiders']
+NEWSPIDER_MODULE = 'uisdc.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+#USER_AGENT = 'uisdc (+http://www.yourdomain.com)'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+#    'uisdc.middlewares.UisdcSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+#    'uisdc.middlewares.UisdcDownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+#ITEM_PIPELINES = {
+#    'uisdc.pipelines.UisdcPipeline': 300,
+#}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+# LOG_LEVEL = 'WARNING'
\ No newline at end of file
--- a/NO44/uisdc/uisdc/spiders/__init__.py
+++ b/NO44/uisdc/uisdc/spiders/__init__.py
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
--- a/NO44/uisdc/uisdc/spiders/__pycache__/__init__.cpython-37.pyc
+++ b/NO44/uisdc/uisdc/spiders/__pycache__/__init__.cpython-37.pyc
--- a/NO44/uisdc/uisdc/spiders/__pycache__/ui.cpython-37.pyc
+++ b/NO44/uisdc/uisdc/spiders/__pycache__/ui.cpython-37.pyc
--- a/NO44/uisdc/uisdc/spiders/ui.py
+++ b/NO44/uisdc/uisdc/spiders/ui.py
+import scrapy
+
+from scrapy.loader import ItemLoader
+from uisdc.items import UisdcItem
+
+
+class UiSpider(scrapy.Spider):
+    name = 'ui'
+    allowed_domains = ['www.uisdc.com']
+    start_urls = ['https://www.uisdc.com/archives']
+    custom_settings = {
+        "ROBOTSTXT_OBEY": False
+    }
+
+    def parse(self, response):
+        items = response.xpath('//div[@id="archive_list"]/div/div[1]/div[1]/div[contains(@class,"item-article")]')
+        for i in items:
+            l = ItemLoader(item=UisdcItem(), selector=i)
+            l.add_xpath('title', ".//h2[@class='item-title']/a/text()")
+            l.add_xpath('author', ".//h3[@class='meta-name']/text()")
+            l.add_xpath('tag', ".//div[@class='meta-tag']/a/text()")
+            ret = l.load_item()
+            # print(ret)
+            yield ret