提交 89d1cfad 编写于 作者: 梦想橡皮擦's avatar 梦想橡皮擦 💬

gevent

上级 ecc073a0
from gevent import monkey
monkey.patch_all()
import threading
from bs4 import BeautifulSoup
import gevent
import requests
import lxml
def get_page(this_urls):
while True:
if this_urls is None:
break
url = this_urls.pop()
print('正在抓取:{},当前的虚拟线程为:{}'.format(url, threading.current_thread().getName()))
res = requests.get(url=url)
res.encoding = "gb2312"
if res.status_code == 200:
soup = BeautifulSoup(res.text, 'lxml')
content = soup.find(attrs={'class': 'g-gxlist-imgbox'})
img_tags = content.find_all('img')
for img_tag in img_tags:
img_src = img_tag['src']
# 注意去除文件路径中的特殊符号,防止出错
try:
name = img_tag['alt'].replace('/', '').replace('+', '').replace('?', '').replace('*', '')
except OSError as e:
continue
save_img(img_src, name)
def save_img(img_src, name):
res = requests.get(img_src)
with open(f'imgs/{name}.jpg', mode='wb') as f:
f.write(res.content)
if __name__ == '__main__':
urls = [f"https://www.qqtn.com/tx/nvshengtx_{page}.html" for page in range(1, 244)]
# 开启 5 个协程
gevent.joinall([gevent.spawn(get_page, urls) for i in range(5)])
print("爬取完毕")
......@@ -77,7 +77,7 @@
### 📙 协程学习
37. 腾牛个性网QQ头像采集,gevent 模块
37. [python 爬虫爱好者必须掌握的知识点“ 协程爬虫”,看一下如何用 gevent 采集女生用头像](https://dream.blog.csdn.net/article/details/120421824)
38. 麦涩网 MyCoser|cosplay 采集,asyncio 库学习
39. 中少绘本 MP4 视频采集,asyncio 协程第3篇
40. Bensound 站 MP3 采集,asyncio + aiohttp 协程第4篇
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册