From 8690fb957df4980e70dbc709798aaf1691bd10f1 Mon Sep 17 00:00:00 2001
From: hihell <q1t5o9e3@163.com>
Date: Thu, 21 Oct 2021 10:11:43 +0800
Subject: [PATCH] =?UTF-8?q?41=E6=A1=88=E4=BE=8B=EF=BC=8C=E5=8D=8F=E7=A8=8B?=
 =?UTF-8?q?=E6=9C=80=E5=90=8E=E4=B8=80=E7=AF=87?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ...55\347\232\204\345\272\224\347\224\250.py" | 38 +++++++++++++++++
 ...66\344\277\241\345\217\267\351\207\217.py" | 42 +++++++++++++++++++
 ...66\350\277\236\346\216\245\346\225\260.py" | 31 ++++++++++++++
 ...32\345\244\232\347\272\277\347\250\213.py" | 38 +++++++++++++++++
 README.md                                     |  8 ++--
 5 files changed, 153 insertions(+), 4 deletions(-)
 create mode 100644 "NO41/Semaphore \345\234\250\345\215\217\347\250\213\344\270\255\347\232\204\345\272\224\347\224\250.py"
 create mode 100644 "NO41/Semaphore \346\216\247\345\210\266\344\277\241\345\217\267\351\207\217.py"
 create mode 100644 "NO41/TCPConnector \351\231\220\345\210\266\350\277\236\346\216\245\346\225\260.py"
 create mode 100644 "NO41/\346\231\256\351\200\232\345\244\232\347\272\277\347\250\213.py"

diff --git "a/NO41/Semaphore \345\234\250\345\215\217\347\250\213\344\270\255\347\232\204\345\272\224\347\224\250.py" "b/NO41/Semaphore \345\234\250\345\215\217\347\250\213\344\270\255\347\232\204\345\272\224\347\224\250.py"
new file mode 100644
index 0000000..64291ad
--- /dev/null
+++ "b/NO41/Semaphore \345\234\250\345\215\217\347\250\213\344\270\255\347\232\204\345\272\224\347\224\250.py"	
@@ -0,0 +1,38 @@
+import time
+
+import asyncio
+import aiohttp
+from bs4 import BeautifulSoup
+
+
+async def get_title(semaphore, url):
+    async with semaphore:
+        print("正在采集：", url)
+        async with aiohttp.request('GET', url) as res:
+            html = await res.text()
+            soup = BeautifulSoup(html, 'html.parser')
+            title_tags = soup.find_all(attrs={'class': 'item-title'})
+            event_names = [item.a.text for item in title_tags]
+            print(event_names)
+
+
+async def main():
+    semaphore = asyncio.Semaphore(10)  # 控制每次最多执行 10 个线程
+    tasks = [asyncio.ensure_future(get_title(semaphore, "http://www.lishiju.net/hotevents/p{}".format(i))) for i in
+             range(111)]
+    dones, pendings = await asyncio.wait(tasks)
+    # for task in dones:
+    #     print(len(task.result()))
+
+
+if __name__ == '__main__':
+
+    start_time = time.perf_counter()
+    asyncio.run(main())
+    print("代码运行时间为：", time.perf_counter() - start_time)
+
+    # # 创建事件循环。
+    # event_loop = asyncio.get_event_loop()
+    # # 启动事件循环并等待协程main()结束。
+    # event_loop.run_until_complete(main())
+    # # 代码运行时间为： 2.227831242
diff --git "a/NO41/Semaphore \346\216\247\345\210\266\344\277\241\345\217\267\351\207\217.py" "b/NO41/Semaphore \346\216\247\345\210\266\344\277\241\345\217\267\351\207\217.py"
new file mode 100644
index 0000000..d1dc944
--- /dev/null
+++ "b/NO41/Semaphore \346\216\247\345\210\266\344\277\241\345\217\267\351\207\217.py"	
@@ -0,0 +1,42 @@
+import threading
+import time
+
+import requests
+from bs4 import BeautifulSoup
+
+
+class MyThread(threading.Thread):
+    def __init__(self, url):
+        threading.Thread.__init__(self)
+        self.__url = url
+
+    def run(self):
+        if semaphore.acquire():  # 计数器 -1
+            print("正在采集：", self.__url)
+            res = requests.get(url=self.__url)
+            soup = BeautifulSoup(res.text, 'html.parser')
+            title_tags = soup.find_all(attrs={'class': 'item-title'})
+            event_names = [item.a.text for item in title_tags]
+            print(event_names)
+            print("")
+            semaphore.release()  # 计数器 +1
+
+
+if __name__ == "__main__":
+    semaphore = threading.Semaphore(5)  # 控制每次最多执行 5 个线程
+    start_time = time.perf_counter()
+    threads = []
+    for i in range(111):  # 创建了110个线程。
+        threads.append(MyThread(url="http://www.lishiju.net/hotevents/p{}".format(i)))
+    for t in threads:
+        t.start()  # 启动了110个线程。
+
+    for t in threads:
+        t.join()  # 等待线程结束
+
+    print("累计耗时：", time.perf_counter() - start_time)
+    # 累计耗时： 2.8005530640000003
+
+
+
+
diff --git "a/NO41/TCPConnector \351\231\220\345\210\266\350\277\236\346\216\245\346\225\260.py" "b/NO41/TCPConnector \351\231\220\345\210\266\350\277\236\346\216\245\346\225\260.py"
new file mode 100644
index 0000000..a1146e5
--- /dev/null
+++ "b/NO41/TCPConnector \351\231\220\345\210\266\350\277\236\346\216\245\346\225\260.py"	
@@ -0,0 +1,31 @@
+import time
+
+import asyncio
+import aiohttp
+from bs4 import BeautifulSoup
+
+
+async def get_title(session, url):
+    async with session.get(url) as res:
+        print("正在采集：", url)
+        html = await res.text()
+        soup = BeautifulSoup(html, 'html.parser')
+        title_tags = soup.find_all(attrs={'class': 'item-title'})
+        event_names = [item.a.text for item in title_tags]
+        print(event_names)
+
+
+
+async def main():
+    connector = aiohttp.TCPConnector(limit=1)  # 限制同时连接数
+    async with aiohttp.ClientSession(connector=connector) as session:
+        tasks = [asyncio.ensure_future(get_title(session, "http://www.lishiju.net/hotevents/p{}".format(i))) for i in
+                 range(111)]
+        await asyncio.wait(tasks)
+
+
+
+if __name__ == '__main__':
+    start_time = time.perf_counter()
+    asyncio.run(main())
+    print("代码运行时间为：", time.perf_counter() - start_time)
diff --git "a/NO41/\346\231\256\351\200\232\345\244\232\347\272\277\347\250\213.py" "b/NO41/\346\231\256\351\200\232\345\244\232\347\272\277\347\250\213.py"
new file mode 100644
index 0000000..6ad3027
--- /dev/null
+++ "b/NO41/\346\231\256\351\200\232\345\244\232\347\272\277\347\250\213.py"
@@ -0,0 +1,38 @@
+import threading
+import time
+
+import requests
+from bs4 import BeautifulSoup
+
+
+class MyThread(threading.Thread):
+    def __init__(self, url):
+        threading.Thread.__init__(self)
+        self.__url = url
+
+    def run(self):
+
+        print("正在采集：", self.__url)
+        res = requests.get(url=self.__url)
+        soup = BeautifulSoup(res.text, 'html.parser')
+        title_tags = soup.find_all(attrs={'class': 'item-title'})
+        event_names = [item.a.text for item in title_tags]
+        print(event_names)
+        print("")
+
+
+
+if __name__ == "__main__":
+
+    start_time = time.perf_counter()
+    threads = []
+    for i in range(111):  # 创建了110个线程。
+        threads.append(MyThread(url="http://www.lishiju.net/hotevents/p{}".format(i)))
+    for t in threads:
+        t.start()  # 启动了110个线程。
+
+    for t in threads:
+        t.join()  # 等待线程结束
+
+    print("累计耗时：", time.perf_counter() - start_time)
+    # 累计耗时： 1.537718624
diff --git a/README.md b/README.md
index 214f717..169ec4e 100644
--- a/README.md
+++ b/README.md
@@ -79,11 +79,11 @@
 
 37. [python 爬虫爱好者必须掌握的知识点“ 协程爬虫”，看一下如何用 gevent 采集女生用头像](https://dream.blog.csdn.net/article/details/120421824)
 38. [python协程总学不会？不可能的，边学协程边采集Coser图吧！](https://dream.blog.csdn.net/article/details/120445004)
-39. 中少绘本 MP4 视频采集，asyncio 协程第3篇
-40. Bensound 站 MP3 采集，asyncio + aiohttp 协程第4篇
-41. 历史剧网采集，协程并发控制
-
+39. [你是不是已经成为【爸爸程序员】了？用Python给自己的宝下载200+绘本动画吧，协程第3遍学习](https://dream.blog.csdn.net/article/details/120463479)
+40. [python 协程第4课，目标数据源为 mp3 ，目标站点为 bensound.com](https://dream.blog.csdn.net/article/details/120507981)
+41. [python 协程补个知识点，控制并发数，python 数据采集必会技能](https://dream.blog.csdn.net/article/details/120879805)
 
+### 📘 scrapy 库学习 
 
 
 
-- 
GitLab