提交 cd3eedc5 编写于 作者: ToTensor's avatar ToTensor

extract success

上级 1ac1d443
......@@ -3,10 +3,13 @@ from src.ebook.community import send_topic
if __name__ == "__main__":
book_mapping = {
"前端体验设计": "c4eeb42b07f54b42a9fd1568b8ec4b98",
"前端体验设计——HTML5+CSS3终极修炼": "c4eeb42b07f54b42a9fd1568b8ec4b98",
}
for key in book_mapping.keys():
extract_code(book_mapping)
web_url = 'https://gitcode.net/csdn/content/book_id_c4eeb42b07f54b42a9fd1568b8ec4b98/-/tree/master/'
web_url = 'https://gitcode.net/csdn/content/book_id_{}/-/tree/master/'.format(
book_mapping[key])
print('-------' * 20)
print('开始向社区发帖')
# send_topic(web_url)
\ No newline at end of file
book_dir = 'data/{}/'.format(key)
# send_topic(web_url, book_dir)
\ No newline at end of file
......@@ -65,9 +65,8 @@ def post(url, params, retry=3, headers=None):
raise error
def send_topic(web_url):
def send_topic(web_url, book_dir):
data_dir = 'data'
book_dir = 'data/深入剖析Nginx/'
# web_url = "https://codechina.csdn.net/csdn/book_code_c798a5992a654857867ec15660e1c32a/-/blob/master/"
request_url = 'http://ccloud.internal.csdn.net/v1/internal/community/content/sendTopic'
......
......@@ -57,7 +57,6 @@ def extract_code(book_mapping):
chapter_content = html.unescape(chapter_content)
# print(chapter_content)
if book_name == "前端体验设计":
section_list = re.findall(r'<h2.*?>(.*?)</h2>',
chapter_content,
flags=re.S)
......@@ -71,8 +70,8 @@ def extract_code(book_mapping):
section = section.replace(' ', ' ')
if section.find(r'/') != -1:
section = section.replace('/', '')
section_dir = os.path.join(
chapter_dir, '{}.{}'.format(idx + 1, section))
section_dir = os.path.join(chapter_dir,
'{}.{}'.format(idx + 1, section))
print(section_dir)
if not os.path.exists(section_dir):
os.mkdir(section_dir)
......@@ -81,13 +80,13 @@ def extract_code(book_mapping):
if idx == 0:
html_save_path = os.path.join(chapter_dir, 'text.html')
else:
html_save_path = os.path.join(
section_dir_list[idx - 1], 'text.html')
html_save_path = os.path.join(section_dir_list[idx - 1],
'text.html')
# with open(html_save_path, 'w', encoding='utf-8') as f:
# f.write(section_content)
code_list = re.findall(r'<code>(.*?)</code>',
section_content, re.S)
code_list = re.findall(r'<code>(.*?)</code>', section_content,
re.S)
res_codelist = []
for code in code_list:
......@@ -122,8 +121,8 @@ def extract_code(book_mapping):
# print(save_file_name)
if idx == 0:
code_save_path = os.path.join(
chapter_dir, 'code_0.css')
code_save_path = os.path.join(chapter_dir,
'code_0.css')
else:
count += 1
code_save_path = os.path.join(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册