Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
CSDN 技术社区
content
全程软件测试(第3版)
提交
a62f993b
全
全程软件测试(第3版)
项目概览
CSDN 技术社区
/
content
/
全程软件测试(第3版)
通知
8
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
全
全程软件测试(第3版)
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
a62f993b
编写于
12月 27, 2021
作者:
ToTensor
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
modify ignore
上级
8fba5e23
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
4 addition
and
313 deletion
+4
-313
.gitignore
.gitignore
+4
-1
data/全程软件测试(第3版).json
data/全程软件测试(第3版).json
+0
-9
main.py
main.py
+0
-9
src/ebook/community.py
src/ebook/community.py
+0
-109
src/ebook/ebook_get_request.py
src/ebook/ebook_get_request.py
+0
-44
src/ebook/extract_book_code.py
src/ebook/extract_book_code.py
+0
-123
src/ebook/get_book_chapter_id_list.py
src/ebook/get_book_chapter_id_list.py
+0
-18
未找到文件。
.gitignore
浏览文件 @
a62f993b
__pycache__
\ No newline at end of file
__pycache__
src
main.py
data/全程软件测试(第3版).json
\ No newline at end of file
data/全程软件测试(第3版).json
已删除
100644 → 0
浏览文件 @
8fba5e23
{
"data/全程软件测试(第3版)/第02章 全程测试:闪光的思想/2.2 测试驱动开发/code_1.java"
:
"https://bbs.csdn.net/topics/603878157"
,
"data/全程软件测试(第3版)/第03章 准备:基础设施与TA框架/3.4 自动化测试框架/code_3.java"
:
"https://bbs.csdn.net/topics/603878239"
,
"data/全程软件测试(第3版)/第03章 准备:基础设施与TA框架/3.4 自动化测试框架/code_2.java"
:
"https://bbs.csdn.net/topics/603878240"
,
"data/全程软件测试(第3版)/第03章 准备:基础设施与TA框架/3.4 自动化测试框架/code_5.java"
:
"https://bbs.csdn.net/topics/603878090"
,
"data/全程软件测试(第3版)/第03章 准备:基础设施与TA框架/3.4 自动化测试框架/code_4.java"
:
"https://bbs.csdn.net/topics/603878158"
,
"data/全程软件测试(第3版)/第03章 准备:基础设施与TA框架/3.4 自动化测试框架/code_1.java"
:
"https://bbs.csdn.net/topics/603878241"
}
\ No newline at end of file
main.py
已删除
100644 → 0
浏览文件 @
8fba5e23
from
src.ebook.extract_book_code
import
extract_code
from
src.ebook.community
import
send_topic
if
__name__
==
"__main__"
:
extract_code
()
web_url
=
'https://gitcode.net/csdn/content/book_code_825acb73c85c4c4bb9632afe858bc097/-/tree/master/'
print
(
'-------'
*
20
)
print
(
'开始向社区发帖'
)
send_topic
(
web_url
)
\ No newline at end of file
src/ebook/community.py
已删除
100644 → 0
浏览文件 @
8fba5e23
import
os
import
json
import
html
import
requests
import
logging
logger
=
logging
.
getLogger
(
__name__
)
def
get_files_path
(
file_dir
,
filetype
=
'.txt'
):
"""得到文件夹下的所有.txt文件的路径
Args:
file_dir: 文件夹路径
filetype: 文件后缀
Returns:
所有filetype类型文件的绝对路径
"""
files_path
=
[]
for
root
,
dirs
,
files
in
os
.
walk
(
file_dir
):
for
file
in
files
:
if
filetype
is
None
or
(
os
.
path
.
splitext
(
file
)[
1
]
==
filetype
):
files_path
.
append
(
os
.
path
.
join
(
root
,
file
))
return
files_path
def
post
(
url
,
params
,
retry
=
3
,
headers
=
None
):
if
headers
is
None
:
hdrs
=
{
"Content-Type"
:
"application/json"
}
else
:
hdrs
=
headers
fails
=
0
while
fails
<
retry
:
try
:
if
headers
is
None
:
data
=
json
.
dumps
(
params
)
else
:
data
=
params
logger
.
debug
(
f
"will post
{
data
}
to
{
url
}
"
)
resp
=
requests
.
post
(
url
,
data
,
headers
=
hdrs
,
timeout
=
10
)
if
resp
:
logger
.
info
(
f
"resp
{
resp
.
content
}
"
)
return
resp
.
json
()
else
:
logger
.
error
(
f
"resp: [
{
resp
}
]"
)
fails
+=
1
except
Exception
as
error
:
logger
.
error
(
f
"post
{
params
}
to
{
url
}
failed
{
error
}
"
)
fails
+=
1
if
fails
>
retry
:
raise
error
def
send_topic
(
web_url
):
data_dir
=
'data'
book_dir
=
'data/全程软件测试(第3版)/'
# web_url = "https://codechina.csdn.net/csdn/book_code_c798a5992a654857867ec15660e1c32a/-/blob/master/"
request_url
=
'http://ccloud.internal.csdn.net/v1/internal/community/content/sendTopic'
files
=
get_files_path
(
'data/全程软件测试(第3版)'
,
'.java'
)
mapping_path
=
'data/全程软件测试(第3版).json'
if
not
os
.
path
.
exists
(
mapping_path
):
chapter_code_mapping
=
{}
save_mapping
=
json
.
dumps
(
chapter_code_mapping
,
ensure_ascii
=
False
,
indent
=
2
)
with
open
(
mapping_path
,
'w'
)
as
f
:
f
.
write
(
save_mapping
)
with
open
(
mapping_path
,
'r'
)
as
f
:
chapter_code_mapping
=
json
.
load
(
f
)
for
file
in
files
:
topic_title
=
file
.
replace
(
book_dir
,
''
)
topic_title
=
topic_title
.
replace
(
'/'
,
'|'
)
topic_title
=
topic_title
.
replace
(
' '
,
'.'
)
# topic_title = html.escape(topic_title)
topic_content
=
web_url
+
file
topic_content
=
"代码:<a href=
\"
{}
\"
>{}</a>"
.
format
(
topic_content
,
topic_title
)
print
(
topic_title
)
send_topic_request_param
=
{
"type"
:
"long_text"
,
"cateId"
:
20965
,
"content"
:
topic_content
,
"topicTitle"
:
topic_title
,
"mdContent"
:
topic_content
,
"communityId"
:
3822
,
"loginUserName"
:
"BBS_Assistant"
,
"bizNo"
:
"ebook"
}
if
chapter_code_mapping
.
get
(
file
)
is
None
:
resp
=
post
(
request_url
,
send_topic_request_param
)
topic_link
=
resp
[
'data'
][
'content'
][
'url'
]
chapter_code_mapping
[
file
]
=
topic_link
print
(
'{}:{}'
.
format
(
file
,
topic_link
))
save_mapping
=
json
.
dumps
(
chapter_code_mapping
,
ensure_ascii
=
False
,
indent
=
2
)
with
open
(
mapping_path
,
'w'
)
as
f
:
f
.
write
(
save_mapping
)
else
:
send_topic_request_param
[
'id'
]
=
int
(
chapter_code_mapping
[
file
].
split
(
'/'
)[
-
1
])
resp
=
post
(
request_url
,
send_topic_request_param
)
print
(
'{}:{}'
.
format
(
file
,
chapter_code_mapping
.
get
(
file
)))
src/ebook/ebook_get_request.py
已删除
100644 → 0
浏览文件 @
8fba5e23
import
json
import
requests
import
logging
logger
=
logging
.
getLogger
(
__name__
)
def
get_chapter_content
(
params
):
url
=
'http://192.168.50.117:9003/v1/chapter/content'
headers
=
{
"Cookie"
:
"UserToken=149ba8a7a8d341bbbe41f904c4c9b176;UserName=xiuxiuyayayy"
}
result
=
requests
.
get
(
url
=
url
,
params
=
params
,
headers
=
headers
)
if
result
.
status_code
==
200
:
ret
=
json
.
loads
(
result
.
text
)
logger
.
info
(
'request success'
)
content
=
ret
[
'data'
]
return
content
else
:
logger
.
info
(
'request failed!!!!!'
)
return
{}
def
get_chapter_list
(
params
):
url
=
'http://192.168.50.117:9003/inner/v1/chapter/list'
headers
=
{
"Cookie"
:
"UserToken=149ba8a7a8d341bbbe41f904c4c9b176;UserName=xiuxiuyayayy"
}
result
=
requests
.
get
(
url
=
url
,
params
=
params
,
headers
=
headers
)
if
result
.
status_code
==
200
:
ret
=
json
.
loads
(
result
.
text
)
logger
.
info
(
'request success'
)
content
=
ret
[
'data'
]
return
content
else
:
logger
.
info
(
'request failed!!!!!'
)
return
{}
\ No newline at end of file
src/ebook/extract_book_code.py
已删除
100644 → 0
浏览文件 @
8fba5e23
import
json
import
os
import
re
import
html
from
bs4
import
BeautifulSoup
from
.get_book_chapter_id_list
import
get_chapter_id_list
from
.ebook_get_request
import
get_chapter_content
def
extract_code
():
# book_mapping_path = "data/book_mapping.json"
# with open(book_mapping_path, "r") as f:
# book_mapping = json.load(f)
book_mapping
=
{
"全程软件测试(第3版)"
:
"825acb73c85c4c4bb9632afe858bc097"
,
}
for
book_idx
,
book_name
in
enumerate
(
book_mapping
.
keys
()):
book_dir_name
=
book_name
book_dir
=
os
.
path
.
join
(
'data'
,
book_dir_name
)
if
not
os
.
path
.
exists
(
book_dir
):
os
.
mkdir
(
book_dir
)
print
(
book_dir_name
)
book_id
=
book_mapping
[
book_name
]
request_get_chapter_id_list_params
=
{
"bookId"
:
book_id
,
"is_main"
:
1
}
chapter_id_list
=
get_chapter_id_list
(
request_get_chapter_id_list_params
)
print
(
chapter_id_list
)
for
chapter_id
in
chapter_id_list
:
request_get_chapter_content_params
=
{
'bookId'
:
book_id
,
'chapterId'
:
chapter_id
}
chapter_resp
=
get_chapter_content
(
request_get_chapter_content_params
)
chapter_name
=
chapter_resp
[
'name'
]
chapter_content
=
chapter_resp
[
'content'
]
try
:
if
book_name
==
"零基础学机器学习"
:
chapter_num
=
re
.
findall
(
r
'第(.*)课'
,
chapter_name
)[
0
]
chapter_name_modify
=
re
.
sub
(
r
'第(.*)课'
,
r
'第{}课'
.
format
(
chapter_num
.
zfill
(
2
)),
chapter_name
)
else
:
chapter_num
=
re
.
findall
(
r
'第(.*)章'
,
chapter_name
)[
0
]
chapter_name_modify
=
re
.
sub
(
r
'第(.*)章'
,
r
'第{}章'
.
format
(
chapter_num
.
zfill
(
2
)),
chapter_name
)
chapter_name
=
chapter_name_modify
except
:
# print('该章节没有章节序号: {}'.format(chapter_name))
pass
chapter_dir
=
os
.
path
.
join
(
book_dir
,
chapter_name
)
if
not
os
.
path
.
exists
(
chapter_dir
):
os
.
mkdir
(
chapter_dir
)
# print('创建文件夹: {}'.format(chapter_dir))
chapter_content
=
html
.
unescape
(
chapter_content
)
if
book_name
==
"全程软件测试(第3版)"
:
section_list
=
re
.
findall
(
r
'<h2.*>(.*?)</h2>'
,
chapter_content
)
section_content_list
=
re
.
split
(
r
'<h2.*?>.*?</h2>'
,
chapter_content
,
flags
=
re
.
S
)
section_dir_list
=
[]
for
section
in
section_list
:
section
=
section
.
replace
(
' '
,
' '
)
if
section
.
find
(
r
'/'
)
!=
-
1
:
section
=
section
.
replace
(
'/'
,
''
)
section_dir
=
os
.
path
.
join
(
chapter_dir
,
section
)
# print(section_dir)
if
not
os
.
path
.
exists
(
section_dir
):
os
.
mkdir
(
section_dir
)
section_dir_list
.
append
(
section_dir
)
for
idx
,
section_content
in
enumerate
(
section_content_list
):
if
idx
==
0
:
html_save_path
=
os
.
path
.
join
(
chapter_dir
,
'text.html'
)
else
:
html_save_path
=
os
.
path
.
join
(
section_dir_list
[
idx
-
1
],
'text.html'
)
# with open(html_save_path, 'w', encoding='utf-8') as f:
# f.write(section_content)
code_list
=
re
.
findall
(
r
'<code>(.*?)</code>'
,
section_content
,
re
.
S
)
res_code_list
=
[]
count
=
0
for
i
in
code_list
:
if
len
(
i
.
split
(
'
\n
'
))
<
2
:
continue
count
+=
1
i
=
html
.
unescape
(
i
)
soup
=
BeautifulSoup
(
i
)
res_str
=
soup
.
get_text
()
if
idx
==
0
:
code_save_dir
=
os
.
path
.
join
(
chapter_dir
,
'code_0.java'
)
else
:
code_save_dir
=
os
.
path
.
join
(
section_dir_list
[
idx
-
1
],
'code_{}.java'
.
format
(
count
))
print
(
code_save_dir
)
print
(
res_str
)
with
open
(
code_save_dir
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
f
.
write
(
res_str
)
# clean_text_list = []
# for line in res_str.split('\n'):
# if line == '':
# continue
# if line[0].isdigit():
# line = re.findall(r'^[0-9]+ {0,2}(.*)',
# line)[0]
# # print(line)
# else:
# if line.startswith('>>'):
# break
# clean_text_list.append(line)
# clean_code = '\n'.join(clean_text_list)
# print(clean_code)
\ No newline at end of file
src/ebook/get_book_chapter_id_list.py
已删除
100644 → 0
浏览文件 @
8fba5e23
import
json
import
re
import
html
import
nltk
import
html2text
import
os
import
pandas
as
pd
from
bs4
import
BeautifulSoup
from
.ebook_get_request
import
get_chapter_list
def
get_chapter_id_list
(
param
):
chapter_list
=
[]
ret
=
get_chapter_list
(
param
)
for
item
in
ret
:
chapterid
=
item
[
'chapterid'
]
chapter_list
.
append
(
chapterid
)
return
chapter_list
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录