Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
zylyyyyyy
Python_692244
提交
ef5bfca9
P
Python_692244
项目概览
zylyyyyyy
/
Python_692244
与 Fork 源项目一致
Fork自
inscode / Python
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Python_692244
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
ef5bfca9
编写于
6月 28, 2025
作者:
R
root
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Sat Jun 28 16:05:00 CST 2025 inscode
上级
0a575cc3
变更
4
展开全部
隐藏空白更改
内联
并排
Showing
4 changed file
with
93 addition
and
17 deletion
+93
-17
.inscode
.inscode
+4
-16
debug.html
debug.html
+1
-0
main.py
main.py
+85
-1
requirements.txt
requirements.txt
+3
-0
未找到文件。
.inscode
浏览文件 @
ef5bfca9
run = "pip install -r requirements.txt;python main.py"
run = "pip install -r requirements.txt && python main.py"
language = "python"
is_gui = false
is_resident = true
[packager]
is_html = false
AUTO_PIP = true
[env]
VIRTUAL_ENV = "/root/${PROJECT_DIR}/venv"
PATH = "${VIRTUAL_ENV}/bin:${PATH}"
PYTHONPATH = "$PYTHONHOME/lib/python3.10:${VIRTUAL_ENV}/lib/python3.10/site-packages"
REPLIT_POETRY_PYPI_REPOSITORY = "http://mirrors.csdn.net.cn/repository/csdn-pypi-mirrors/simple"
MPLBACKEND = "TkAgg"
POETRY_CACHE_DIR = "/root/${PROJECT_DIR}/.cache/pypoetry"
[debugger]
program = "main.py"
debug.html
0 → 100644
浏览文件 @
ef5bfca9
此差异已折叠。
点击以展开。
main.py
浏览文件 @
ef5bfca9
print
(
'欢迎来到 InsCode'
)
import
requests
\ No newline at end of file
from
lxml
import
etree
import
re
import
pymysql
from
time
import
sleep
from
concurrent.futures
import
ThreadPoolExecutor
def
get_conn
():
# 创建连接
conn
=
pymysql
.
connect
(
host
=
"127.0.0.1"
,
user
=
"root"
,
password
=
"root"
,
db
=
"novels"
,
charset
=
"utf8"
)
# 创建游标
cursor
=
conn
.
cursor
()
return
conn
,
cursor
def
close_conn
(
conn
,
cursor
):
cursor
.
close
()
conn
.
close
()
def
get_xpath_resp
(
url
):
headers
=
{
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
}
try
:
resp
=
requests
.
get
(
url
,
headers
=
headers
,
timeout
=
10
)
print
(
f
"响应状态码:
{
resp
.
status_code
}
"
)
print
(
f
"网页内容长度:
{
len
(
resp
.
text
)
}
"
)
with
open
(
"debug.html"
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
resp
.
text
)
tree
=
etree
.
HTML
(
resp
.
text
)
return
tree
,
resp
except
Exception
as
e
:
print
(
f
"请求失败:
{
str
(
e
)
}
"
)
return
None
,
None
def
get_chapters
(
url
):
tree
,
_
=
get_xpath_resp
(
url
)
# 获取小说名字
novel_name_elements
=
tree
.
xpath
(
'//*[@id="info"]/h1/text()'
)
if
not
novel_name_elements
:
novel_name
=
"未知小说"
else
:
novel_name
=
novel_name_elements
[
0
]
# 获取小说数据节点 - 使用更通用的选择器
dds
=
tree
.
xpath
(
'//dl[contains(@class,"chapterlist")]/dd'
)
or
tree
.
xpath
(
'//div[@class="listmain"]//dd'
)
title_list
=
[]
link_list
=
[]
for
d
in
dds
[:
15
]:
title
=
d
.
xpath
(
'./a/text()'
)[
0
]
# 章节标题
title_list
.
append
(
title
)
link
=
d
.
xpath
(
'./a/@href'
)[
0
]
# 章节链接
chapter_url
=
url
+
link
# 构造完整链接
link_list
.
append
(
chapter_url
)
return
title_list
,
link_list
,
novel_name
def
get_content
(
novel_name
,
title
,
url
):
try
:
cursor
=
None
conn
=
None
conn
,
cursor
=
get_conn
()
# 插入数据的sql
sql
=
'INSERT INTO novel(novel_name,chapter_name,content) VALUES(%s,%s,%s)'
tree
,
resp
=
get_xpath_resp
(
url
)
# 获取内容
content
=
re
.
findall
(
'<div id="content">(.*?)</div>'
,
resp
.
text
)[
0
]
# 对内容进行清洗
content
=
content
.
replace
(
'<br />'
,
'
\n
'
).
replace
(
' '
,
' '
).
replace
(
'全本小说网 www.qb5.tw,最快更新<a href="https://www.qb5.tw/book_116659/">宇宙职业选手</a>最新章节!<br><br>'
,
''
)
print
(
title
,
content
)
cursor
.
execute
(
sql
,[
novel_name
,
title
,
content
])
# 插入数据
conn
.
commit
()
# 提交事务保存数据
except
:
pass
finally
:
sleep
(
2
)
close_conn
(
conn
,
cursor
)
# 关闭数据库
if
__name__
==
'__main__'
:
# 获取小说名字,标题链接,章节名称
title_list
,
link_list
,
novel_name
=
get_chapters
(
'https://www.qb5.tw/book_116659/'
)
with
ThreadPoolExecutor
(
5
)
as
t
:
# 创建5个线程
for
title
,
link
in
zip
(
title_list
,
link_list
):
t
.
submit
(
get_content
,
novel_name
,
title
,
link
)
# 启动线程
\ No newline at end of file
requirements.txt
浏览文件 @
ef5bfca9
requests
==2.31.0
lxml
==4.9.4
pymysql
==1.1.0
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录