From b2148e6a37f767cefeb412b4766e477dfcae6387 Mon Sep 17 00:00:00 2001
From: chenlong <chenlong@csdn.net>
Date: Thu, 13 Jan 2022 18:36:44 +0800
Subject: [PATCH] add questions

---
 .../12.autoscraper/autoscraper_desc.json"     |  7 +++
 .../12.autoscraper/autoscraper_desc.md"       | 29 ++++++++++
 .../12.autoscraper/config.json"               |  9 +++
 .../12.autoscraper/hello_autoscraper.json"    |  7 +++
 .../12.autoscraper/hello_autoscraper.md"      | 46 +++++++++++++++
 .../12.autoscraper/hello_autoscraper.py"      | 12 ++++
 .../13.selectolax/config.json"                |  9 +++
 .../13.selectolax/hello_selectolax.json"      |  7 +++
 .../13.selectolax/hello_selectolax.md"        | 57 +++++++++++++++++++
 .../13.selectolax/hello_selectolax.py"        | 24 ++++++++
 .../13.selectolax/selectolax_desc.json"       |  7 +++
 .../13.selectolax/selectolax_desc.md"         | 29 ++++++++++
 .../14.requests-html/config.json"             |  9 +++
 .../hello_requests_html.json"                 |  7 +++
 .../14.requests-html/hello_requests_html.md"  | 45 +++++++++++++++
 .../14.requests-html/hello_requests_html.py"  | 11 ++++
 .../14.requests-html/requests_html_desc.json" |  7 +++
 .../14.requests-html/requests_html_desc.md"   | 29 ++++++++++
 18 files changed, 351 insertions(+)
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/autoscraper_desc.json"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/autoscraper_desc.md"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/config.json"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/hello_autoscraper.json"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/hello_autoscraper.md"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/hello_autoscraper.py"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/config.json"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/hello_selectolax.json"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/hello_selectolax.md"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/hello_selectolax.py"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/selectolax_desc.json"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/selectolax_desc.md"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/config.json"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/hello_requests_html.json"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/hello_requests_html.md"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/hello_requests_html.py"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/requests_html_desc.json"
 create mode 100644 "data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/requests_html_desc.md"

diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/autoscraper_desc.json" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/autoscraper_desc.json"
new file mode 100644
index 0000000..06f8798
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/autoscraper_desc.json"
@@ -0,0 +1,7 @@
+{
+    "source": "autoscraper_desc.md",
+    "depends": [],
+    "type": "code_options",
+    "author": "zxm2015",
+    "notebook_enable": true
+}
\ No newline at end of file
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/autoscraper_desc.md" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/autoscraper_desc.md"
new file mode 100644
index 0000000..10701be
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/autoscraper_desc.md"
@@ -0,0 +1,29 @@
+# autoscraper简介
+
+autoscraper是一个基于python的智能、自动、快速和轻量级的网络爬虫，以下说法<span style="color:red">错误</span>的是：
+
+## 答案
+
+```bash
+是目前解析速度最快的网络爬虫
+```
+
+## 选项
+
+### A
+
+```bash
+同时提供了精确抽取的方法
+```
+
+### B
+
+```bash
+可以根据示例文本自动抽取相似的文本
+```
+
+### C
+
+```bash
+避免了手写页面抽取规则的烦恼
+```
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/config.json" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/config.json"
new file mode 100644
index 0000000..42d7c05
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/config.json"
@@ -0,0 +1,9 @@
+{
+    "export": ["autoscraper_desc.json", "hello_autoscraper.json"],
+    "keywords": [],
+    "children": [],
+    "keywords_must": [
+      "autoscraper"
+    ],
+    "keywords_forbid": []
+}
\ No newline at end of file
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/hello_autoscraper.json" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/hello_autoscraper.json"
new file mode 100644
index 0000000..855c990
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/hello_autoscraper.json"
@@ -0,0 +1,7 @@
+{
+    "source": "hello_autoscraper.md",
+    "depends": [],
+    "type": "code_options",
+    "author": "zxm2015",
+    "notebook_enable": true
+}
\ No newline at end of file
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/hello_autoscraper.md" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/hello_autoscraper.md"
new file mode 100644
index 0000000..7ac426d
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/hello_autoscraper.md"
@@ -0,0 +1,46 @@
+# autoscraper示例
+
+使用 autoscraper 从stackoverflow搜索页提取相似的主题帖，代码如下：
+
+```python
+# -*- coding: UTF-8 -*-
+from autoscraper import AutoScraper
+
+def get_similar_result(url, wanted_list):
+    scraper = AutoScraper()
+    # TODO(You): 正确的提取代码
+    return result
+
+url = 'https://stackoverflow.com/search?q=autoscraper&s=7b5866da-920e-4926-8c33-09fb7d32886b'
+wanted_list = ["AutoScraper module not found in Python Autoscraper library"]
+print(get_similar_result(url, wanted_list))
+
+```
+
+关于缺失代码部分，以下选项<span style="color:red">正确</span>的是：
+
+## 答案
+
+```python
+result = scraper.build(url, wanted_list)
+```
+
+## 选项
+
+### A
+
+```python
+result = scraper.get_result_similar(url, wanted_list)
+```
+
+### B
+
+```python
+result = scraper.get(url, wanted_list)
+```
+
+### C
+
+```python
+result = scraper.get_result_exact(url, wanted_list)
+```
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/hello_autoscraper.py" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/hello_autoscraper.py"
new file mode 100644
index 0000000..ba8f24e
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/12.autoscraper/hello_autoscraper.py"
@@ -0,0 +1,12 @@
+# -*- coding: UTF-8 -*-
+
+from autoscraper import AutoScraper
+
+def get_similar_result(url, wanted_list):
+    scraper = AutoScraper()
+    result = scraper.build(url, wanted_list)
+    return result
+
+url = 'https://stackoverflow.com/search?q=autoscraper&s=7b5866da-920e-4926-8c33-09fb7d32886b'
+wanted_list = ["AutoScraper module not found in Python Autoscraper library"]
+print(get_similar_result(url, wanted_list))
\ No newline at end of file
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/config.json" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/config.json"
new file mode 100644
index 0000000..1d44ced
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/config.json"
@@ -0,0 +1,9 @@
+{
+    "export": ["selectolax_desc.json", "hello_selectolax.json"],
+    "keywords": [],
+    "children": [],
+    "keywords_must": [
+      "selectolax"
+    ],
+    "keywords_forbid": []
+}
\ No newline at end of file
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/hello_selectolax.json" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/hello_selectolax.json"
new file mode 100644
index 0000000..4c6179e
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/hello_selectolax.json"
@@ -0,0 +1,7 @@
+{
+    "source": "hello_selectolax.md",
+    "depends": [],
+    "type": "code_options",
+    "author": "zxm2015",
+    "notebook_enable": true
+}
\ No newline at end of file
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/hello_selectolax.md" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/hello_selectolax.md"
new file mode 100644
index 0000000..f9c87b8
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/hello_selectolax.md"
@@ -0,0 +1,57 @@
+# selectolax示例
+
+使用 selectolax 提取页面p标签的内容，代码如下：
+
+```python
+# -*- coding: UTF-8 -*-
+from selectolax.parser import HTMLParser
+
+def get_p(html):
+    p_list = []
+    for node in HTMLParser(html).css("p"):
+        # TODO(You): 正确的提取代码
+    return p_list
+
+html = '''
+    <html>
+        <head>
+            <title>这是一个简单的测试页面</title>
+        </head>
+        <body>
+            <p class="item-0">body 元素的内容会显示在浏览器中。</p>
+            <p class="item-1">title 元素的内容会显示在浏览器的标题栏中。</p>
+        </body>
+    </html>
+    '''
+
+print(get_p(html))
+
+```
+
+关于缺失代码部分，以下选项<span style="color:red">正确</span>的是：
+
+## 答案
+
+```python
+p_list.append(node.text())
+```
+
+## 选项
+
+### A
+
+```python
+p_list.append(node.text)
+```
+
+### B
+
+```python
+p_list.append(node)
+```
+
+### C
+
+```python
+p_list.append(node.get_text())
+```
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/hello_selectolax.py" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/hello_selectolax.py"
new file mode 100644
index 0000000..c55eed1
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/hello_selectolax.py"
@@ -0,0 +1,24 @@
+# -*- coding: UTF-8 -*-
+
+from selectolax.parser import HTMLParser
+
+
+def get_p(html):
+    p_list = []
+    for node in HTMLParser(html).css("p"):
+        p_list.append(node.text())
+    return p_list
+
+html = '''
+    <html>
+        <head>
+            <title>这是一个简单的测试页面</title>
+        </head>
+        <body>
+            <p class="item-0">body 元素的内容会显示在浏览器中。</p>
+            <p class="item-1">title 元素的内容会显示在浏览器的标题栏中。</p>
+        </body>
+    </html>
+    '''
+
+print(get_p(html))
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/selectolax_desc.json" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/selectolax_desc.json"
new file mode 100644
index 0000000..f05a79f
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/selectolax_desc.json"
@@ -0,0 +1,7 @@
+{
+    "source": "selectolax_desc.md",
+    "depends": [],
+    "type": "code_options",
+    "author": "zxm2015",
+    "notebook_enable": true
+}
\ No newline at end of file
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/selectolax_desc.md" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/selectolax_desc.md"
new file mode 100644
index 0000000..0eb973d
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/13.selectolax/selectolax_desc.md"
@@ -0,0 +1,29 @@
+# selectolax简介
+
+selectolax用来高效解析网页，以下说法<span style="color:red">错误</span>的是：
+
+## 答案
+
+```bash
+selectolax提供了下载网页功能
+```
+
+## 选项
+
+### A
+
+```bash
+selectolax解析速度优于lxml
+```
+
+### B
+
+```bash
+爬取大量数据，解析页面可以考虑使用selectolax
+```
+
+### C
+
+```bash
+使用了Modest和Lexbor引擎
+```
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/config.json" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/config.json"
new file mode 100644
index 0000000..3f80850
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/config.json"
@@ -0,0 +1,9 @@
+{
+    "export": ["requests_html_desc.json", "hello_requests_html.json"],
+    "keywords": [],
+    "children": [],
+    "keywords_must": [
+      "requests-html"
+    ],
+    "keywords_forbid": []
+}
\ No newline at end of file
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/hello_requests_html.json" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/hello_requests_html.json"
new file mode 100644
index 0000000..4fd4c51
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/hello_requests_html.json"
@@ -0,0 +1,7 @@
+{
+    "source": "hello_requests_html.md",
+    "depends": [],
+    "type": "code_options",
+    "author": "zxm2015",
+    "notebook_enable": true
+}
\ No newline at end of file
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/hello_requests_html.md" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/hello_requests_html.md"
new file mode 100644
index 0000000..b8b91a2
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/hello_requests_html.md"
@@ -0,0 +1,45 @@
+# requests-html示例
+
+使用 requests-html 提取页面https://www.baidu.com/上面的所有链接，代码如下：
+
+```python
+# -*- coding: UTF-8 -*-
+from requests_html import HTMLSession
+
+def get_url(url):
+    session = HTMLSession()
+    r = session.get(url)
+    # TODO(You): 正确的提取代码
+    return urls
+
+print(get_url("https://www.baidu.com/"))
+
+```
+
+关于缺失代码部分，以下选项<span style="color:red">正确</span>的是：
+
+## 答案
+
+```python
+urls = r.html.links
+```
+
+## 选项
+
+### A
+
+```python
+urls = r.html.find("url")
+```
+
+### B
+
+```python
+urls = r.html.find("url")[0]
+```
+
+### C
+
+```python
+urls = r.html.urls
+```
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/hello_requests_html.py" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/hello_requests_html.py"
new file mode 100644
index 0000000..10828f5
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/hello_requests_html.py"
@@ -0,0 +1,11 @@
+# -*- coding: UTF-8 -*-
+
+from requests_html import HTMLSession
+
+
+def get_url(url):
+    session = HTMLSession()
+    r = session.get(url)
+    return r.html.links
+
+print(get_url("https://www.baidu.com/"))
\ No newline at end of file
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/requests_html_desc.json" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/requests_html_desc.json"
new file mode 100644
index 0000000..4b51197
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/requests_html_desc.json"
@@ -0,0 +1,7 @@
+{
+    "source": "requests_html_desc.md",
+    "depends": [],
+    "type": "code_options",
+    "author": "zxm2015",
+    "notebook_enable": true
+}
\ No newline at end of file
diff --git "a/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/requests_html_desc.md" "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/requests_html_desc.md"
new file mode 100644
index 0000000..badac7e
--- /dev/null
+++ "b/data/2.python\344\270\255\351\230\266/3.\347\275\221\347\273\234\347\210\254\350\231\253/14.requests-html/requests_html_desc.md"
@@ -0,0 +1,29 @@
+# requests-html简介
+
+requests-html可以使爬虫开发人员方便的编写爬虫代码，以下说法<span style="color:red">错误</span>的是：
+
+## 答案
+
+```bash
+支持验证码识别
+```
+
+## 选项
+
+### A
+
+```bash
+requests-html不仅可以下载网页，还可以解析网页
+```
+
+### B
+
+```bash
+支持CSS和XPath选择器
+```
+
+### C
+
+```bash
+支持持久cookie和代理
+```
-- 
GitLab