diff --git "a/\346\241\210\344\276\21330/mySpider/__init__.py" "b/\346\241\210\344\276\21330/mySpider/__init__.py" new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git "a/\346\241\210\344\276\21330/mySpider/__pycache__/__init__.cpython-38.pyc" "b/\346\241\210\344\276\21330/mySpider/__pycache__/__init__.cpython-38.pyc" new file mode 100644 index 0000000000000000000000000000000000000000..6f1757592986d6495f75bd57dd56c7905a501ffb Binary files /dev/null and "b/\346\241\210\344\276\21330/mySpider/__pycache__/__init__.cpython-38.pyc" differ diff --git "a/\346\241\210\344\276\21330/mySpider/__pycache__/items.cpython-38.pyc" "b/\346\241\210\344\276\21330/mySpider/__pycache__/items.cpython-38.pyc" new file mode 100644 index 0000000000000000000000000000000000000000..71ede66933d6551ec00f33c3a65991bd434bf973 Binary files /dev/null and "b/\346\241\210\344\276\21330/mySpider/__pycache__/items.cpython-38.pyc" differ diff --git "a/\346\241\210\344\276\21330/mySpider/__pycache__/pipelines.cpython-38.pyc" "b/\346\241\210\344\276\21330/mySpider/__pycache__/pipelines.cpython-38.pyc" new file mode 100644 index 0000000000000000000000000000000000000000..47bbe2a8f7a6cca256f8bf9507378d48c4e4c060 Binary files /dev/null and "b/\346\241\210\344\276\21330/mySpider/__pycache__/pipelines.cpython-38.pyc" differ diff --git "a/\346\241\210\344\276\21330/mySpider/__pycache__/settings.cpython-38.pyc" "b/\346\241\210\344\276\21330/mySpider/__pycache__/settings.cpython-38.pyc" new file mode 100644 index 0000000000000000000000000000000000000000..cfe1646ae608505bdeb6d3774bcd66f5dd774f0d Binary files /dev/null and "b/\346\241\210\344\276\21330/mySpider/__pycache__/settings.cpython-38.pyc" differ diff --git "a/\346\241\210\344\276\21330/mySpider/begin.py" "b/\346\241\210\344\276\21330/mySpider/begin.py" new file mode 100644 index 0000000000000000000000000000000000000000..5c2f67d2c1518163884ad90fcba56f84e169a147 --- /dev/null +++ "b/\346\241\210\344\276\21330/mySpider/begin.py" @@ -0,0 +1,2 @@ +from scrapy import cmdline +cmdline.execute(("scrapy crawl jianzhu").split()) \ No newline at end of file diff --git "a/\346\241\210\344\276\21330/mySpider/items.py" "b/\346\241\210\344\276\21330/mySpider/items.py" new file mode 100644 index 0000000000000000000000000000000000000000..139d9daee50e71fb5b11d9876670a9ae019d20e2 --- /dev/null +++ "b/\346\241\210\344\276\21330/mySpider/items.py" @@ -0,0 +1,17 @@ +# Define here the models for your scraped items +# +# See documentation in: +# https://docs.scrapy.org/en/latest/topics/items.html + +import scrapy + + +class MyspiderItem(scrapy.Item): + # define the fields for your item here like: + # name = scrapy.Field() + # 标题 + title = scrapy.Field() + # 发布人 + userName = scrapy.Field() + # 发布时间 + createTime = scrapy.Field() \ No newline at end of file diff --git "a/\346\241\210\344\276\21330/mySpider/middlewares.py" "b/\346\241\210\344\276\21330/mySpider/middlewares.py" new file mode 100644 index 0000000000000000000000000000000000000000..a8bffc5272818a2cab2804904f307f08254156f3 --- /dev/null +++ "b/\346\241\210\344\276\21330/mySpider/middlewares.py" @@ -0,0 +1,103 @@ +# Define here the models for your spider middleware +# +# See documentation in: +# https://docs.scrapy.org/en/latest/topics/spider-middleware.html + +from scrapy import signals + +# useful for handling different item types with a single interface +from itemadapter import is_item, ItemAdapter + + +class MyspiderSpiderMiddleware: + # Not all methods need to be defined. If a method is not defined, + # scrapy acts as if the spider middleware does not modify the + # passed objects. + + @classmethod + def from_crawler(cls, crawler): + # This method is used by Scrapy to create your spiders. + s = cls() + crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) + return s + + def process_spider_input(self, response, spider): + # Called for each response that goes through the spider + # middleware and into the spider. + + # Should return None or raise an exception. + return None + + def process_spider_output(self, response, result, spider): + # Called with the results returned from the Spider, after + # it has processed the response. + + # Must return an iterable of Request, or item objects. + for i in result: + yield i + + def process_spider_exception(self, response, exception, spider): + # Called when a spider or process_spider_input() method + # (from other spider middleware) raises an exception. + + # Should return either None or an iterable of Request or item objects. + pass + + def process_start_requests(self, start_requests, spider): + # Called with the start requests of the spider, and works + # similarly to the process_spider_output() method, except + # that it doesn’t have a response associated. + + # Must return only requests (not items). + for r in start_requests: + yield r + + def spider_opened(self, spider): + spider.logger.info('Spider opened: %s' % spider.name) + + +class MyspiderDownloaderMiddleware: + # Not all methods need to be defined. If a method is not defined, + # scrapy acts as if the downloader middleware does not modify the + # passed objects. + + @classmethod + def from_crawler(cls, crawler): + # This method is used by Scrapy to create your spiders. + s = cls() + crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) + return s + + def process_request(self, request, spider): + # Called for each request that goes through the downloader + # middleware. + + # Must either: + # - return None: continue processing this request + # - or return a Response object + # - or return a Request object + # - or raise IgnoreRequest: process_exception() methods of + # installed downloader middleware will be called + return None + + def process_response(self, request, response, spider): + # Called with the response returned from the downloader. + + # Must either; + # - return a Response object + # - return a Request object + # - or raise IgnoreRequest + return response + + def process_exception(self, request, exception, spider): + # Called when a download handler or a process_request() + # (from other downloader middleware) raises an exception. + + # Must either: + # - return None: continue processing this exception + # - return a Response object: stops process_exception() chain + # - return a Request object: stops process_exception() chain + pass + + def spider_opened(self, spider): + spider.logger.info('Spider opened: %s' % spider.name) diff --git "a/\346\241\210\344\276\21330/mySpider/pipelines.py" "b/\346\241\210\344\276\21330/mySpider/pipelines.py" new file mode 100644 index 0000000000000000000000000000000000000000..8301175536e7f5e4de74a2751c3f3733cda74758 --- /dev/null +++ "b/\346\241\210\344\276\21330/mySpider/pipelines.py" @@ -0,0 +1,35 @@ +# Define your item pipelines here +# +# Don't forget to add your pipeline to the ITEM_PIPELINES setting +# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html + + +# useful for handling different item types with a single interface +from itemadapter import ItemAdapter +import os +import csv + +class MyspiderPipeline: + + def __init__(self): + # csv 文件 + store_file = os.path.dirname(__file__)+"/spiders/school1.csv" + self.file = open(store_file,"a+",newline='',encoding="utf-8") + self.writer = csv.writer(self.file) + + def process_item(self, item, spider): + try: + + self.writer.writerow(( + item["title"], + item["userName"], + item["createTime"] + )) + + except Exception as e: + print(e.args) + + + def close_spider(self,spider): + self.file.close() + diff --git "a/\346\241\210\344\276\21330/mySpider/settings.py" "b/\346\241\210\344\276\21330/mySpider/settings.py" new file mode 100644 index 0000000000000000000000000000000000000000..5f046212cef1e058ab81c730c7276ae95949192a --- /dev/null +++ "b/\346\241\210\344\276\21330/mySpider/settings.py" @@ -0,0 +1,88 @@ +# Scrapy settings for mySpider project +# +# For simplicity, this file contains only settings considered important or +# commonly used. You can find more settings consulting the documentation: +# +# https://docs.scrapy.org/en/latest/topics/settings.html +# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html +# https://docs.scrapy.org/en/latest/topics/spider-middleware.html + +BOT_NAME = 'mySpider' + +SPIDER_MODULES = ['mySpider.spiders'] +NEWSPIDER_MODULE = 'mySpider.spiders' + + +# Crawl responsibly by identifying yourself (and your website) on the user-agent +#USER_AGENT = 'mySpider (+http://www.yourdomain.com)' + +# Obey robots.txt rules +ROBOTSTXT_OBEY = False + +# Configure maximum concurrent requests performed by Scrapy (default: 16) +#CONCURRENT_REQUESTS = 32 + +# Configure a delay for requests for the same website (default: 0) +# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay +# See also autothrottle settings and docs +#DOWNLOAD_DELAY = 3 +# The download delay setting will honor only one of: +#CONCURRENT_REQUESTS_PER_DOMAIN = 16 +#CONCURRENT_REQUESTS_PER_IP = 16 + +# Disable cookies (enabled by default) +#COOKIES_ENABLED = False + +# Disable Telnet Console (enabled by default) +#TELNETCONSOLE_ENABLED = False + +# Override the default request headers: +#DEFAULT_REQUEST_HEADERS = { +# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', +# 'Accept-Language': 'en', +#} + +# Enable or disable spider middlewares +# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html +#SPIDER_MIDDLEWARES = { +# 'mySpider.middlewares.MyspiderSpiderMiddleware': 543, +#} + +# Enable or disable downloader middlewares +# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html +#DOWNLOADER_MIDDLEWARES = { +# 'mySpider.middlewares.MyspiderDownloaderMiddleware': 543, +#} + +# Enable or disable extensions +# See https://docs.scrapy.org/en/latest/topics/extensions.html +#EXTENSIONS = { +# 'scrapy.extensions.telnet.TelnetConsole': None, +#} + +# Configure item pipelines +# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html +ITEM_PIPELINES = { + 'mySpider.pipelines.MyspiderPipeline': 300, +} + +# Enable and configure the AutoThrottle extension (disabled by default) +# See https://docs.scrapy.org/en/latest/topics/autothrottle.html +#AUTOTHROTTLE_ENABLED = True +# The initial download delay +#AUTOTHROTTLE_START_DELAY = 5 +# The maximum download delay to be set in case of high latencies +#AUTOTHROTTLE_MAX_DELAY = 60 +# The average number of requests Scrapy should be sending in parallel to +# each remote server +#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 +# Enable showing throttling stats for every response received: +#AUTOTHROTTLE_DEBUG = False + +# Enable and configure HTTP caching (disabled by default) +# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings +#HTTPCACHE_ENABLED = True +#HTTPCACHE_EXPIRATION_SECS = 0 +#HTTPCACHE_DIR = 'httpcache' +#HTTPCACHE_IGNORE_HTTP_CODES = [] +#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' diff --git "a/\346\241\210\344\276\21330/mySpider/spiders/__init__.py" "b/\346\241\210\344\276\21330/mySpider/spiders/__init__.py" new file mode 100644 index 0000000000000000000000000000000000000000..ebd689ac51d69c5e1dbbe80083c2b20a39f8bb79 --- /dev/null +++ "b/\346\241\210\344\276\21330/mySpider/spiders/__init__.py" @@ -0,0 +1,4 @@ +# This package will contain the spiders of your Scrapy project +# +# Please refer to the documentation for information on how to create and manage +# your spiders. diff --git "a/\346\241\210\344\276\21330/mySpider/spiders/__pycache__/__init__.cpython-38.pyc" "b/\346\241\210\344\276\21330/mySpider/spiders/__pycache__/__init__.cpython-38.pyc" new file mode 100644 index 0000000000000000000000000000000000000000..270d5409944ffef90390a2b46c501947001f2808 Binary files /dev/null and "b/\346\241\210\344\276\21330/mySpider/spiders/__pycache__/__init__.cpython-38.pyc" differ diff --git "a/\346\241\210\344\276\21330/mySpider/spiders/__pycache__/jiansheku.cpython-38.pyc" "b/\346\241\210\344\276\21330/mySpider/spiders/__pycache__/jiansheku.cpython-38.pyc" new file mode 100644 index 0000000000000000000000000000000000000000..3817b06eacc37763aa496eae2de7c0195a303786 Binary files /dev/null and "b/\346\241\210\344\276\21330/mySpider/spiders/__pycache__/jiansheku.cpython-38.pyc" differ diff --git "a/\346\241\210\344\276\21330/mySpider/spiders/__pycache__/jianzhu.cpython-38.pyc" "b/\346\241\210\344\276\21330/mySpider/spiders/__pycache__/jianzhu.cpython-38.pyc" new file mode 100644 index 0000000000000000000000000000000000000000..a422197beb5d8c3aef6c5f1a6d950140bd9327f8 Binary files /dev/null and "b/\346\241\210\344\276\21330/mySpider/spiders/__pycache__/jianzhu.cpython-38.pyc" differ diff --git "a/\346\241\210\344\276\21330/mySpider/spiders/jianzhu.py" "b/\346\241\210\344\276\21330/mySpider/spiders/jianzhu.py" new file mode 100644 index 0000000000000000000000000000000000000000..91f69011d9b3873bbeec4da4d64bbfc10f113ab4 --- /dev/null +++ "b/\346\241\210\344\276\21330/mySpider/spiders/jianzhu.py" @@ -0,0 +1,47 @@ +import scrapy +from scrapy import FormRequest +import json +from items import MyspiderItem + + +class JianshekuSpider(scrapy.Spider): + name = 'jianzhu' + allowed_domains = ['admin.jzda001.com'] + start_url = 'https://admin.jzda001.com/api/core/002--newsList' + + def __init__(self): + self.headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "Referer": "hhttps://www.baidu.com/xyzg/" + } + + # 需要重写 start_requests() 方法 + + def start_requests(self): + # 只获取 20 页数据 + for page in range(1, 5): + form_data = { + "type": "1", + "limit": "17", + "pageNo": str(page) + } + + request = FormRequest( + self.start_url, headers=self.headers, formdata=form_data, callback=self.parse) + yield request + + def parse(self, response): + # print(response.body) + # print(response.url) + # print(response.body_as_unicode()) + data = json.loads(response.body_as_unicode()) + data = data["rows"] # 获取数据 + print(data) + for item in data: + school = MyspiderItem() + school["title"] = item["title"] + school["userName"] = item["userName"] + school["createTime"] = item["createTime"] + + # 将获取的数据交给pipelines,pipelines在settings.py中定义 + yield school diff --git "a/\346\241\210\344\276\21330/mySpider/spiders/school1.csv" "b/\346\241\210\344\276\21330/mySpider/spiders/school1.csv" new file mode 100644 index 0000000000000000000000000000000000000000..398ea379da5be76cb51acb99a5d0328eae122384 --- /dev/null +++ "b/\346\241\210\344\276\21330/mySpider/spiders/school1.csv" @@ -0,0 +1,68 @@ +弘阳服务:上半年归属股东净利润5983万元 同比增156%,中国网地产,2021-08-19 22:53:48 +灵感库丨城市中消失的“围墙”,让空间连接生活,Roca Gallery乐家艺术廊,2021-08-19 18:38:13 +【建言︱对话大桥谕】:延续扎哈设计思想,探索未来城市,Roca Gallery乐家艺术廊,2021-08-19 18:38:08 +半年报快读|危中有机 “零踩线”越秀地产稳中求进,中国网地产,2021-08-19 15:14:06 +John Gillen荣升Aedas执行董事,Aedas,2021-08-19 14:30:55 +喜讯丨四项由Aedas设计的项目荣膺2021年国际建筑大奖,Aedas,2021-08-19 14:30:43 +Aedas设计的香港西贡WM酒店携手2021香港小姐美丽同行!,Aedas,2021-08-19 14:30:30 +李忠观点丨从东亚文化消费发展,看中国国潮崛起(三),华高莱斯,2021-08-19 11:43:23 +喜讯丨承构建筑荣获两项法国Novum Design Award建筑设计类别最高奖项,承构建筑,2021-08-19 09:28:40 +新时代的科学城:你应该了解的6大命题!,TOP创新区研究院,2021-08-19 09:27:34 +此心安处 | 海口罗牛山玖悦台 · 金樾府 铂樾府,奥雅设计LA-2013,2021-08-19 09:27:05 +趋势|芒果雪糕色,戴昆学习小组,2021-08-19 09:24:38 +优客工场企业服务生态集群之 | 省广众烁:打造低成本高品质的数字整合营销服务,优客工场ucommune,2021-08-19 09:24:07 +半年报解读丨时代中国:上半年收入降至136.38亿元 非控股股东权益利润分配成谜,中国网地产,2021-08-19 00:09:43 +卓越商企服务:预计上半年归属股东净利润同比增55%,中国网地产,2021-08-19 00:08:57 +“玺悦相逢 致敬望京” 2021望京国际化发展主题峰会周六启幕,中国网地产,2021-08-20 13:16:00 +喜讯 | GLC两项作品入围英国SBID国际设计大奖Finalist,投票启动!,GLC(中国),2021-08-21 20:14:48 +建业地产:2021年上半年归属股东净利润7.29亿元 同比上升0.3%,中国网地产,2021-08-19 00:08:03 +宝龙地产:上半年归属股东净利润39.08亿元 同比上升约76.1%,中国网地产,2021-08-19 00:07:00 +筑土分享丨联合国第六次气候变化报告发布:地球向人类发出的“红色警报”,筑土国际,2021-08-20 11:27:49 +UNStudio「索契海滨包容性规划设计」被评选为获胜方案,UNStudio,2021-08-20 10:58:13 +澳大利亚墨尔本南岸项目中标三周年!,UNStudio,2021-08-20 10:57:04 +"佳期如约, 荣光而至 | 2021年中项目荣誉合集",致逸设计,2021-08-20 09:24:56 +做产业创新区的五大雷区,TOP创新区研究院,2021-08-20 09:17:56 +金科服务:上半年归属股东净利润5.29亿元 同比增80.3%,中国网地产,2021-08-19 23:03:43 +华发物业服务:预计上半年归属股东净利润同比增长超400%,中国网地产,2021-08-19 23:03:03 +中国宏泰发展:上半年归属股东净利润1.03亿元 同比减少82.23%,中国网地产,2021-08-19 23:02:09 +南国置业:上半年归属股东净亏损4.23亿元,中国网地产,2021-08-19 23:00:21 +远洋集团:上半年归属股东净利润10.10亿元,中国网地产,2021-08-19 22:59:12 +半年报快读|半年收租8亿元 “包租公”SOHO中国的生意场,中国网地产,2021-08-19 22:58:29 +华发股份:上半年归属股东净利润16.87亿元 同比增15.03%,中国网地产,2021-08-19 22:57:46 +南京高科:上半年归属股东净利润14.32亿元 同比增长0.68%,中国网地产,2021-08-19 22:56:36 +荣盛发展:上半年归属股东净利润25.32亿元,中国网地产,2021-08-19 22:55:46 +复星旅游文化:上半年归属股东净亏损20.04亿元,中国网地产,2021-08-19 22:55:09 +商办市场高质量找房网站,速读网六大优势了解一下,优客工场ucommune,2021-08-23 09:23:36 +行村设计 | 藏如明信片般的四季风景 202村温泉民宿,建筑档案,2021-08-20 18:24:23 +谈加薪,先要一杯水;谈离职,要杯星巴克。,那小海,2021-08-20 18:02:47 +新书出版 |《SOM 作品精选》系列丛书最新版:收录过去十年最脍炙人口的设计作品,SOM设计事务所,2021-08-20 17:17:31 +UNStudio赢得韩国忠南美术馆设计竞赛,UNStudio,2021-08-20 10:58:26 +UNStudio 亚洲工作室 | 建成项目,UNStudio,2021-08-18 14:31:26 +重磅嘉宾阵容公布,五大前沿主题蓄势待发 | 2021上海国际建筑文化周,建筑档案,2021-08-18 10:09:07 +独家 | 成都仁恒置地广场将如何改造?,伍兹贝格建筑设计事务所,2021-08-17 09:37:27 +佳兆业美好:上半年归属股东净利润2.09亿元 同比增75.6%,中国网地产,2021-08-19 22:54:29 +政策篇 | 严禁高杠杆企业结构化发债,多地新房销售政策趋严, 建诚晟业,2021-08-23 09:20:11 +GLC | 作品合集,GLC(中国),2021-08-21 20:15:26 +北京住建委:严禁样板间“货不对板”;华润置地前7月合同销售金额约1878.1亿元丨地产财经早餐,中国网地产,2021-08-21 06:45:03 +北京前7月商品房销售面积603.6万平方米;中海物业上半年归属股东净利润3.93亿港元丨地产财经早餐,中国网地产,2021-08-21 06:44:55 +北京加强公租房资格复核及分配管理;雅居乐上半年归属股东净利润52.9亿元丨地产财经早餐,中国网地产,2021-08-21 06:44:37 +河北严禁无证认购、认筹等变相售房行为;荣盛发展上半年归属股东净利润25.32亿元丨地产财经早餐,中国网地产,2021-08-21 06:43:20 +杭州推出“个人自主挂牌房源”线上新渠道,中国网地产,2021-08-21 06:42:50 +奥园健康:上半年归属股东净利润1.77亿元 同比增60.16%,中国网地产,2021-08-21 06:06:23 +亿达中国:上半年归属股东净利润2.71亿元 同比降8.8%,中国网地产,2021-08-21 06:05:13 +三盛控股:上半年归属股东净利润6.25亿元 同比增加293.1%,中国网地产,2021-08-21 06:04:26 +中国奥园:上半年归属股东净利润20.88亿元,中国网地产,2021-08-21 06:03:16 +喜讯 | 阿拓拉斯荣获ELA国际景观大奖,阿拓拉斯(中国)规划·设计,2021-08-16 15:34:22 +在研究 | 从地图看一个华北农村二十多年的变化,在建筑,2021-08-16 10:00:03 +蓝天组Wolf D. Prix:从解构主义到人工智能的“两日之遥”,构筑空间,2021-07-30 10:41:51 +社会住宅不是低品质社区的代名词,低收入者也有享受高质量建筑空间的权利,URBANUS都市实践,2021-07-30 09:22:52 +LA聚焦 | 李雄:公园体检——助力城市公园系统更新,风景园林杂志,2021-07-07 11:12:07 +李忠观点丨紧抓数字游牧民,兑现海岛科技价值,华高莱斯,2021-06-23 17:29:32 +朗诗绿色生活:上半年净利润1444万元 同比减少1.4%,中国网地产,2021-08-21 06:02:09 +上海市财政局:本市契税的适用税率为3%,中国网地产,2021-08-21 05:15:46 +股份过户已获批复 华建控股成为嘉凯城控股股东,中国网地产,2021-08-21 05:14:19 +业绩五年增长十倍 打造绿地企业新名片,中国网地产,2021-08-20 20:27:02 +Archdaily专访BIG出版三部曲,BIG建筑事务所,2021-08-20 17:55:15 +趋势预警|材料美学(下),戴昆学习小组,2021-06-07 09:25:24 +上海新田360广场:浦东腹地的“品质生活社交场”,三益中国,2021-08-23 09:55:44 +趋势|造梦空间,戴昆学习小组,2021-08-23 09:24:06 diff --git "a/\346\241\210\344\276\21330/scrapy.cfg" "b/\346\241\210\344\276\21330/scrapy.cfg" new file mode 100644 index 0000000000000000000000000000000000000000..002f1d3683675a689b75c30264bd026ad6b45f17 --- /dev/null +++ "b/\346\241\210\344\276\21330/scrapy.cfg" @@ -0,0 +1,11 @@ +# Automatically created by: scrapy startproject +# +# For more information about the [deploy] section see: +# https://scrapyd.readthedocs.io/en/latest/deploy.html + +[settings] +default = mySpider.settings + +[deploy] +#url = http://localhost:6800/ +project = mySpider