未验证 提交 897891ca 编写于 作者: R Roy Binux 提交者: GitHub

Merge pull request #955 from binux/fix-test

Fix test
sudo: required
language: python
cache: pip
python:
- 3.4
- 3.5
- 3.6
- 3.7
......@@ -11,29 +9,25 @@ services:
- docker
- mongodb
- rabbitmq
- redis-server
- redis
- mysql
#- elasticsearch
# - elasticsearch
- postgresql
addons:
postgresql: "9.4"
apt:
packages:
- rabbitmq-server
env:
- IGNORE_COUCHDB=1
before_install:
- echo "deb https://apache.bintray.com/couchdb-deb xenial main" | sudo tee -a /etc/apt/sources.list
- curl -L https://couchdb.apache.org/repo/bintray-pubkey.asc | sudo apt-key add -
- sudo apt-get update -qq
- sudo apt-get install -y couchdb
- sudo systemctl start couchdb
- curl -O https://download.elastic.co/elasticsearch/release/org/elasticsearch/distribution/deb/elasticsearch/2.4.0/elasticsearch-2.4.0.deb && sudo dpkg -i --force-confnew elasticsearch-2.4.0.deb && sudo service elasticsearch restart
- npm install express puppeteer
- sudo docker pull scrapinghub/splash
- sudo docker run -d --net=host scrapinghub/splash
before_script:
- curl -X PUT http://127.0.0.1:5984/_users
- curl -X PUT http://127.0.0.1:5984/_replicator
- psql -c "CREATE DATABASE pyspider_test_taskdb ENCODING 'UTF8' TEMPLATE=template0;" -U postgres
- psql -c "CREATE DATABASE pyspider_test_projectdb ENCODING 'UTF8' TEMPLATE=template0;" -U postgres
- psql -c "CREATE DATABASE pyspider_test_resultdb ENCODING 'UTF8' TEMPLATE=template0;" -U postgres
......
......@@ -5,7 +5,7 @@ A Powerful Spider(Web Crawler) System in Python.
- Write script in Python
- Powerful WebUI with script editor, task monitor, project manager and result viewer
- [MySQL](https://www.mysql.com/), [CouchDB](https://couchdb.apache.org), [MongoDB](https://www.mongodb.org/), [Redis](http://redis.io/), [SQLite](https://www.sqlite.org/), [Elasticsearch](https://www.elastic.co/products/elasticsearch); [PostgreSQL](http://www.postgresql.org/) with [SQLAlchemy](http://www.sqlalchemy.org/) as database backend
- [MySQL](https://www.mysql.com/), [MongoDB](https://www.mongodb.org/), [Redis](http://redis.io/), [SQLite](https://www.sqlite.org/), [Elasticsearch](https://www.elastic.co/products/elasticsearch); [PostgreSQL](http://www.postgresql.org/) with [SQLAlchemy](http://www.sqlalchemy.org/) as database backend
- [RabbitMQ](http://www.rabbitmq.com/), [Redis](http://redis.io/) and [Kombu](http://kombu.readthedocs.org/) as message queue
- Task priority, retry, periodical, recrawl by age, etc...
- Distributed architecture, Crawl Javascript pages, Python 2.{6,7}, 3.{3,4,5,6} support, etc...
......
......@@ -13,26 +13,15 @@ services:
networks:
- pyspider
command: rabbitmq-server
couchdb:
image: couchdb:latest
container_name: couchdb
mysql:
image: mysql:latest
container_name: mysql
volumes:
- /tmp:/var/lib/mysql
environment:
- COUCHDB_USER=user
- COUCHDB_PASSWORD=password
- MYSQL_ALLOW_EMPTY_PASSWORD=yes
networks:
- pyspider
ports:
- "5984:5984"
# OR we can replace couchdb with mysql
#mysql:
# image: mysql:latest
# container_name: mysql
# volumes:
# - /tmp:/var/lib/mysql
# environment:
# - MYSQL_ALLOW_EMPTY_PASSWORD=yes
# networks:
# - pyspider
phantomjs:
image: pyspider:latest
container_name: phantomjs
......
......@@ -214,26 +214,8 @@ def _connect_couchdb(parsed, dbtype, url):
params = {}
# default to env, then url, then hard coded
params['username'] = os.environ.get('COUCHDB_USER') or parsed.username or 'user'
params['password'] = os.environ.get('COUCHDB_PASSWORD') or parsed.password or 'password'
# create necessary DBs + the admin user
res = requests.put(url + "_users")
if 'error' in res and res['error'] == 'unauthorized':
# user is already created. This will happen if CouchDB is running in docker
# and COUCHDB_USER and COUCHDB_PASSWORD are set
from requests.auth import HTTPBasicAuth
requests.put(url + "_users",
auth=HTTPBasicAuth(params['username'], params['password']))
requests.put(url + "_replicator",
auth=HTTPBasicAuth(params['username'], params['password']))
requests.put(url + '_node/_local/_config/admins/' + params['username'],
data=params['password'],
auth=HTTPBasicAuth(params['username'], params['password']))
else:
requests.put(url + "_replicator")
requests.put(url + '_node/_local/_config/admins/' + params['username'],
data=params['password'])
params['username'] = os.environ.get('COUCHDB_USER') or parsed.username
params['password'] = os.environ.get('COUCHDB_PASSWORD') or parsed.password
if dbtype == 'taskdb':
from .couchdb.taskdb import TaskDB
......
......@@ -4,6 +4,12 @@ from requests.auth import HTTPBasicAuth
class SplitTableMixin(object):
UPDATE_PROJECTS_TIME = 10 * 60
def __init__(self):
self.session = requests.session()
if self.username:
self.session.auth = HTTPBasicAuth(self.username, self.password)
self.session.headers.update({'Content-Type': 'application/json'})
def _collection_name(self, project):
if self.collection_prefix:
return "%s_%s" % (self.collection_prefix, project)
......@@ -32,10 +38,7 @@ class SplitTableMixin(object):
prefix = ''
url = self.base_url + "_all_dbs"
res = requests.get(url,
data=json.dumps({}),
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
res = self.session.get(url, json={}).json()
for each in res:
if each.startswith('_'):
continue
......@@ -45,9 +48,7 @@ class SplitTableMixin(object):
def create_database(self, name):
url = self.base_url + name
res = requests.put(url,
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
res = self.session.put(url).json()
if 'error' in res and res['error'] == 'unauthorized':
raise Exception("Supplied credentials are incorrect. Reason: {} for User: {} Password: {}".format(res['reason'], self.username, self.password))
return res
......@@ -55,9 +56,7 @@ class SplitTableMixin(object):
def get_doc(self, db_name, doc_id):
url = self.base_url + db_name + "/" + doc_id
res = requests.get(url,
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
res = self.session.get(url).json()
if "error" in res and res["error"] == "not_found":
return None
return res
......@@ -66,10 +65,7 @@ class SplitTableMixin(object):
def get_docs(self, db_name, selector):
url = self.base_url + db_name + "/_find"
selector['use_index'] = self.index
res = requests.post(url,
data=json.dumps(selector),
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
res = self.session.post(url, json=selector).json()
if 'error' in res and res['error'] == 'not_found':
return []
return res['docs']
......@@ -81,10 +77,7 @@ class SplitTableMixin(object):
def insert_doc(self, db_name, doc_id, doc):
url = self.base_url + db_name + "/" + doc_id
return requests.put(url,
data=json.dumps(doc),
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
return self.session.put(url, json=doc).json()
def update_doc(self, db_name, doc_id, new_doc):
......@@ -94,14 +87,9 @@ class SplitTableMixin(object):
for key in new_doc:
doc[key] = new_doc[key]
url = self.base_url + db_name + "/" + doc_id
return requests.put(url,
data=json.dumps(doc),
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
return self.session.put(url, json=doc).json()
def delete(self, url):
return requests.delete(url,
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
return self.session.delete(url).json()
......@@ -6,17 +6,19 @@ from pyspider.database.base.projectdb import ProjectDB as BaseProjectDB
class ProjectDB(BaseProjectDB):
__collection_name__ = 'projectdb'
def __init__(self, url, database='projectdb', username='username', password='password'):
def __init__(self, url, database='projectdb', username=None, password=None):
self.username = username
self.password = password
self.url = url + self.__collection_name__ + "_" + database + "/"
self.database = database
self.insert('', {})
self.session = requests.session()
if username:
self.session.auth = HTTPBasicAuth(self.username, self.password)
self.session.headers.update({'Content-Type': 'application/json'})
# Create the db
res = requests.put(self.url,
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
res = self.session.put(self.url).json()
if 'error' in res and res['error'] == 'unauthorized':
raise Exception(
"Supplied credentials are incorrect. Reason: {} for User: {} Password: {}".format(res['reason'],
......@@ -29,9 +31,7 @@ class ProjectDB(BaseProjectDB):
},
'name': self.__collection_name__ + "_" + database
}
res = requests.post(self.url+"_index", data=json.dumps(payload),
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
res = self.session.post(self.url + "_index", json=payload).json()
self.index = res['id']
def _default_fields(self, each):
......@@ -51,10 +51,7 @@ class ProjectDB(BaseProjectDB):
obj = dict(obj)
obj['name'] = name
obj['updatetime'] = time.time()
res = requests.put(url,
data = json.dumps(obj),
headers = {"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
res = self.session.put(url, json=obj).json()
return res
def update(self, name, obj={}, **kwargs):
......@@ -78,10 +75,7 @@ class ProjectDB(BaseProjectDB):
"use_index": self.index
}
url = self.url + "_find"
res = requests.post(url,
data=json.dumps(payload),
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
res = self.session.post(url, json=payload).json()
for doc in res['docs']:
yield self._default_fields(doc)
......@@ -95,10 +89,7 @@ class ProjectDB(BaseProjectDB):
"use_index": self.index
}
url = self.url + "_find"
res = requests.post(url,
data=json.dumps(payload),
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
res = self.session.post(url, json=payload).json()
if len(res['docs']) == 0:
return None
return self._default_fields(res['docs'][0])
......@@ -115,13 +106,7 @@ class ProjectDB(BaseProjectDB):
doc = self.get(name)
payload = {"rev": doc["_rev"]}
url = self.url + name
return requests.delete(url,
params=payload,
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
return self.session.delete(url, params=payload).json()
def drop_database(self):
return requests.delete(self.url,
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
return self.session.delete(self.url).json()
import time, json, requests
from requests.auth import HTTPBasicAuth
import time, json
from pyspider.database.base.resultdb import ResultDB as BaseResultDB
from .couchdbbase import SplitTableMixin
......@@ -7,13 +6,14 @@ from .couchdbbase import SplitTableMixin
class ResultDB(SplitTableMixin, BaseResultDB):
collection_prefix = ''
def __init__(self, url, database='resultdb', username='username', password='password'):
def __init__(self, url, database='resultdb', username=None, password=None):
self.username = username
self.password = password
self.base_url = url
self.url = url + database + "/"
self.database = database
super().__init__()
self.create_database(database)
self.index = None
......@@ -31,10 +31,7 @@ class ResultDB(SplitTableMixin, BaseResultDB):
'name': collection_name
}
res = requests.post(self.base_url + collection_name + "/_index",
data=json.dumps(payload),
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
res = self.session.post(self.base_url + collection_name + "/_index", json=payload).json()
self.index = res['id']
self._list_project()
......
import json, time, requests
from requests.auth import HTTPBasicAuth
import json, time
from pyspider.database.base.taskdb import TaskDB as BaseTaskDB
from .couchdbbase import SplitTableMixin
......@@ -7,15 +6,17 @@ from .couchdbbase import SplitTableMixin
class TaskDB(SplitTableMixin, BaseTaskDB):
collection_prefix = ''
def __init__(self, url, database='taskdb', username='username', password='password'):
def __init__(self, url, database='taskdb', username=None, password=None):
self.username = username
self.password = password
self.base_url = url
self.url = url + database + "/"
self.database = database
self.create_database(database)
self.index = None
super().__init__()
self.create_database(database)
self.projects = set()
self._list_project()
......@@ -32,10 +33,7 @@ class TaskDB(SplitTableMixin, BaseTaskDB):
},
'name': collection_name
}
res = requests.post(self.base_url + collection_name + "/_index",
data=json.dumps(payload),
headers={"Content-Type": "application/json"},
auth=HTTPBasicAuth(self.username, self.password)).json()
res = self.session.post(self.base_url + collection_name + "/_index", json=payload).json()
self.index = res['id']
self._list_project()
......
......@@ -432,9 +432,9 @@ def python_console(namespace=None):
def check_port_open(port, addr='127.0.0.1'):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
result = sock.connect_ex((addr, port))
if result == 0:
return True
else:
return False
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
result = sock.connect_ex((addr, port))
if result == 0:
return True
else:
return False
Flask==0.10
Jinja2==2.7
chardet==2.2.1
chardet==3.0.4
cssselect==0.9
lxml==4.3.3
pycurl==7.43.0.3
pyquery==1.4.0
requests==2.2
requests==2.24.0
tornado==4.5.3
mysql-connector-python==8.0.16
pika==1.1.0
......
......@@ -20,25 +20,21 @@ import pyspider
install_requires = [
'Flask==0.10',
'Jinja2==2.7',
'chardet==2.2.1',
'chardet==3.0.4',
'cssselect==0.9',
"lxml==4.3.3",
'pycurl==7.43.0.3',
'requests==2.2',
'requests==2.24.0',
'Flask-Login==0.2.11',
'u-msgpack-python==1.6',
'click==3.3',
'six==1.10.0',
'tblib==1.4.0'
'tblib==1.4.0',
'wsgidav==2.3.0',
'tornado>=3.2,<=4.5.3',
'pyquery',
]
if sys.version_info >= (3, 0): # 3.*
install_requires.extend([
'wsgidav==2.3.0',
'tornado>=3.2,<=4.5.3',
'pyquery',
])
extras_require_all = [
'mysql-connector-python==8.0.16',
'pymongo==3.9.0',
......@@ -46,15 +42,11 @@ extras_require_all = [
'redis-py-cluster==1.3.6',
'psycopg2==2.8.2',
'elasticsearch==2.3.0',
'kombu==4.4.0',
'amqp==2.4.0',
'SQLAlchemy==1.3.10',
'pika==1.1.0'
]
if sys.version_info >= (3, 0): # 3.*
extras_require_all.extend([
'kombu==4.4.0',
'amqp==2.4.0',
'SQLAlchemy==1.3.10',
'pika==1.1.0'
])
setup(
name='pyspider',
......@@ -72,9 +64,6 @@ setup(
classifiers=[
'Development Status :: 4 - Beta',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
......@@ -100,7 +89,8 @@ setup(
'all': extras_require_all,
'test': [
'coverage',
'httpbin<=0.5.0',
'Werkzeug==0.16.1',
'httpbin==0.7.0',
'pyproxy==0.1.6',
'easywebdav==1.2.0',
]
......
......@@ -697,11 +697,6 @@ class TestCouchDBProjectDB(ProjectDBCase, unittest.TestCase):
@classmethod
def setUpClass(self):
# create a test admin user
import requests
requests.put('http://localhost:5984/_node/_local/_config/admins/test',
data='"password"')
os.environ["COUCHDB_USER"] = "test"
os.environ["COUCHDB_PASSWORD"] = "password"
self.projectdb = database.connect_database(
'couchdb+projectdb://localhost:5984/'
)
......@@ -710,12 +705,6 @@ class TestCouchDBProjectDB(ProjectDBCase, unittest.TestCase):
@classmethod
def tearDownClass(self):
# remove the test admin user
import requests
from requests.auth import HTTPBasicAuth
requests.delete('http://localhost:5984/_node/_local/_config/admins/test',
auth=HTTPBasicAuth('test', 'password'))
del os.environ["COUCHDB_USER"]
del os.environ["COUCHDB_PASSWORD"]
self.projectdb.drop_database()
......@@ -725,11 +714,6 @@ class TestCouchDBResultDB(ResultDBCase, unittest.TestCase):
@classmethod
def setUpClass(self):
# create a test admin user
import requests
requests.put('http://localhost:5984/_node/_local/_config/admins/test',
data='"password"')
os.environ["COUCHDB_USER"] = "test"
os.environ["COUCHDB_PASSWORD"] = "password"
self.resultdb = database.connect_database(
'couchdb+resultdb://localhost:5984/'
)
......@@ -738,12 +722,6 @@ class TestCouchDBResultDB(ResultDBCase, unittest.TestCase):
@classmethod
def tearDownClass(self):
# remove the test admin user
import requests
from requests.auth import HTTPBasicAuth
requests.delete('http://localhost:5984/_node/_local/_config/admins/test',
auth=HTTPBasicAuth('test', 'password'))
del os.environ["COUCHDB_USER"]
del os.environ["COUCHDB_PASSWORD"]
self.resultdb.drop_database()
def test_create_project(self):
......@@ -759,10 +737,6 @@ class TestCouchDBTaskDB(TaskDBCase, unittest.TestCase):
def setUpClass(self):
# create a test admin user
import requests
requests.put('http://localhost:5984/_node/_local/_config/admins/test',
data='"password"')
os.environ["COUCHDB_USER"] = "test"
os.environ["COUCHDB_PASSWORD"] = "password"
self.taskdb = database.connect_database(
'couchdb+taskdb://localhost:5984/'
)
......@@ -773,10 +747,6 @@ class TestCouchDBTaskDB(TaskDBCase, unittest.TestCase):
# remove the test admin user
import requests
from requests.auth import HTTPBasicAuth
requests.delete('http://localhost:5984/_node/_local/_config/admins/test',
auth=HTTPBasicAuth('test', 'password'))
del os.environ["COUCHDB_USER"]
del os.environ["COUCHDB_PASSWORD"]
self.taskdb.drop_database()
def test_create_project(self):
......
......@@ -156,14 +156,9 @@ class TestRun(unittest.TestCase):
def test_60a_docker_couchdb(self):
try:
# create a test admin user
import requests
requests.put('http://localhost:5984/_node/_local/_config/admins/test',
data='"password"')
os.environ['COUCHDB_NAME'] = 'couchdb'
os.environ['COUCHDB_PORT_5984_TCP_ADDR'] = 'localhost'
os.environ['COUCHDB_PORT_5984_TCP_PORT'] = '5984'
os.environ["COUCHDB_USER"] = "test"
os.environ["COUCHDB_PASSWORD"] = "password"
ctx = run.cli.make_context('test', [], None,
obj=dict(testing_mode=True))
ctx = run.cli.invoke(ctx)
......@@ -172,15 +167,9 @@ class TestRun(unittest.TestCase):
self.assertIsNone(e)
finally:
# remove the test admin user
import requests
from requests.auth import HTTPBasicAuth
requests.delete('http://localhost:5984/_node/_local/_config/admins/test',
auth=HTTPBasicAuth('test', 'password'))
del os.environ['COUCHDB_NAME']
del os.environ['COUCHDB_PORT_5984_TCP_ADDR']
del os.environ['COUCHDB_PORT_5984_TCP_PORT']
del os.environ["COUCHDB_USER"]
del os.environ["COUCHDB_PASSWORD"]
@unittest.skip('only available in docker')
@unittest.skipIf(os.environ.get('IGNORE_MYSQL') or os.environ.get('IGNORE_ALL'), 'no mysql server for test.')
......
[tox]
envlist = py26,py27,py33,py34,py35
envlist = py35,py36,py37,py38
[testenv]
install_command =
pip install --allow-all-external 'https://dev.mysql.com/get/Downloads/Connector-Python/mysql-connector-python-2.1.5.zip#md5=ce4a24cb1746c1c8f6189a97087f21c1' {opts} -e .[all,test] {packages}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册