41案例，协程最后一篇

8690fb95 · 梦想橡皮擦 · 1ad7750a · 8690fb95 · 8690fb95 · 8690fb95
5 changed file
--- a/NO41/Semaphore 在协程中的应用.py
+++ b/NO41/Semaphore 在协程中的应用.py
+import time
+
+import asyncio
+import aiohttp
+from bs4 import BeautifulSoup
+
+
+async def get_title(semaphore, url):
+    async with semaphore:
+        print("正在采集：", url)
+        async with aiohttp.request('GET', url) as res:
+            html = await res.text()
+            soup = BeautifulSoup(html, 'html.parser')
+            title_tags = soup.find_all(attrs={'class': 'item-title'})
+            event_names = [item.a.text for item in title_tags]
+            print(event_names)
+
+
+async def main():
+    semaphore = asyncio.Semaphore(10)  # 控制每次最多执行 10 个线程
+    tasks = [asyncio.ensure_future(get_title(semaphore, "http://www.lishiju.net/hotevents/p{}".format(i))) for i in
+             range(111)]
+    dones, pendings = await asyncio.wait(tasks)
+    # for task in dones:
+    #     print(len(task.result()))
+
+
+if __name__ == '__main__':
+
+    start_time = time.perf_counter()
+    asyncio.run(main())
+    print("代码运行时间为：", time.perf_counter() - start_time)
+
+    # # 创建事件循环。
+    # event_loop = asyncio.get_event_loop()
+    # # 启动事件循环并等待协程main()结束。
+    # event_loop.run_until_complete(main())
+    # # 代码运行时间为： 2.227831242
--- a/NO41/Semaphore 控制信号量.py
+++ b/NO41/Semaphore 控制信号量.py
+import threading
+import time
+
+import requests
+from bs4 import BeautifulSoup
+
+
+class MyThread(threading.Thread):
+    def __init__(self, url):
+        threading.Thread.__init__(self)
+        self.__url = url
+
+    def run(self):
+        if semaphore.acquire():  # 计数器 -1
+            print("正在采集：", self.__url)
+            res = requests.get(url=self.__url)
+            soup = BeautifulSoup(res.text, 'html.parser')
+            title_tags = soup.find_all(attrs={'class': 'item-title'})
+            event_names = [item.a.text for item in title_tags]
+            print(event_names)
+            print("")
+            semaphore.release()  # 计数器 +1
+
+
+if __name__ == "__main__":
+    semaphore = threading.Semaphore(5)  # 控制每次最多执行 5 个线程
+    start_time = time.perf_counter()
+    threads = []
+    for i in range(111):  # 创建了110个线程。
+        threads.append(MyThread(url="http://www.lishiju.net/hotevents/p{}".format(i)))
+    for t in threads:
+        t.start()  # 启动了110个线程。
+
+    for t in threads:
+        t.join()  # 等待线程结束
+
+    print("累计耗时：", time.perf_counter() - start_time)
+    # 累计耗时： 2.8005530640000003
+
+
+
+
--- a/NO41/TCPConnector 限制连接数.py
+++ b/NO41/TCPConnector 限制连接数.py
+import time
+
+import asyncio
+import aiohttp
+from bs4 import BeautifulSoup
+
+
+async def get_title(session, url):
+    async with session.get(url) as res:
+        print("正在采集：", url)
+        html = await res.text()
+        soup = BeautifulSoup(html, 'html.parser')
+        title_tags = soup.find_all(attrs={'class': 'item-title'})
+        event_names = [item.a.text for item in title_tags]
+        print(event_names)
+
+
+
+async def main():
+    connector = aiohttp.TCPConnector(limit=1)  # 限制同时连接数
+    async with aiohttp.ClientSession(connector=connector) as session:
+        tasks = [asyncio.ensure_future(get_title(session, "http://www.lishiju.net/hotevents/p{}".format(i))) for i in
+                 range(111)]
+        await asyncio.wait(tasks)
+
+
+
+if __name__ == '__main__':
+    start_time = time.perf_counter()
+    asyncio.run(main())
+    print("代码运行时间为：", time.perf_counter() - start_time)
--- a/NO41/普通多线程.py
+++ b/NO41/普通多线程.py
+import threading
+import time
+
+import requests
+from bs4 import BeautifulSoup
+
+
+class MyThread(threading.Thread):
+    def __init__(self, url):
+        threading.Thread.__init__(self)
+        self.__url = url
+
+    def run(self):
+
+        print("正在采集：", self.__url)
+        res = requests.get(url=self.__url)
+        soup = BeautifulSoup(res.text, 'html.parser')
+        title_tags = soup.find_all(attrs={'class': 'item-title'})
+        event_names = [item.a.text for item in title_tags]
+        print(event_names)
+        print("")
+
+
+
+if __name__ == "__main__":
+
+    start_time = time.perf_counter()
+    threads = []
+    for i in range(111):  # 创建了110个线程。
+        threads.append(MyThread(url="http://www.lishiju.net/hotevents/p{}".format(i)))
+    for t in threads:
+        t.start()  # 启动了110个线程。
+
+    for t in threads:
+        t.join()  # 等待线程结束
+
+    print("累计耗时：", time.perf_counter() - start_time)
+    # 累计耗时： 1.537718624
--- a/README.md
+++ b/README.md
@@ -79,11 +79,11 @@

 37. [python 爬虫爱好者必须掌握的知识点“ 协程爬虫”，看一下如何用 gevent 采集女生用头像](https://dream.blog.csdn.net/article/details/120421824)
 38. [python协程总学不会？不可能的，边学协程边采集Coser图吧！](https://dream.blog.csdn.net/article/details/120445004)
-39. 中少绘本 MP4 视频采集，asyncio 协程第3篇
-40. Bensound 站 MP3 采集，asyncio + aiohttp 协程第4篇
-41. 历史剧网采集，协程并发控制
-
+39. [你是不是已经成为【爸爸程序员】了？用Python给自己的宝下载200+绘本动画吧，协程第3遍学习](https://dream.blog.csdn.net/article/details/120463479)
+40. [python 协程第4课，目标数据源为 mp3 ，目标站点为 bensound.com](https://dream.blog.csdn.net/article/details/120507981)
+41. [python 协程补个知识点，控制并发数，python 数据采集必会技能](https://dream.blog.csdn.net/article/details/120879805)

+### 📘 scrapy 库学习