提交 cd3eedc5 编写于 作者: ToTensor's avatar ToTensor

extract success

上级 1ac1d443
...@@ -3,10 +3,13 @@ from src.ebook.community import send_topic ...@@ -3,10 +3,13 @@ from src.ebook.community import send_topic
if __name__ == "__main__": if __name__ == "__main__":
book_mapping = { book_mapping = {
"前端体验设计": "c4eeb42b07f54b42a9fd1568b8ec4b98", "前端体验设计——HTML5+CSS3终极修炼": "c4eeb42b07f54b42a9fd1568b8ec4b98",
} }
for key in book_mapping.keys():
extract_code(book_mapping) extract_code(book_mapping)
web_url = 'https://gitcode.net/csdn/content/book_id_c4eeb42b07f54b42a9fd1568b8ec4b98/-/tree/master/' web_url = 'https://gitcode.net/csdn/content/book_id_{}/-/tree/master/'.format(
book_mapping[key])
print('-------' * 20) print('-------' * 20)
print('开始向社区发帖') print('开始向社区发帖')
# send_topic(web_url) book_dir = 'data/{}/'.format(key)
\ No newline at end of file # send_topic(web_url, book_dir)
\ No newline at end of file
...@@ -65,9 +65,8 @@ def post(url, params, retry=3, headers=None): ...@@ -65,9 +65,8 @@ def post(url, params, retry=3, headers=None):
raise error raise error
def send_topic(web_url): def send_topic(web_url, book_dir):
data_dir = 'data' data_dir = 'data'
book_dir = 'data/深入剖析Nginx/'
# web_url = "https://codechina.csdn.net/csdn/book_code_c798a5992a654857867ec15660e1c32a/-/blob/master/" # web_url = "https://codechina.csdn.net/csdn/book_code_c798a5992a654857867ec15660e1c32a/-/blob/master/"
request_url = 'http://ccloud.internal.csdn.net/v1/internal/community/content/sendTopic' request_url = 'http://ccloud.internal.csdn.net/v1/internal/community/content/sendTopic'
......
...@@ -57,7 +57,6 @@ def extract_code(book_mapping): ...@@ -57,7 +57,6 @@ def extract_code(book_mapping):
chapter_content = html.unescape(chapter_content) chapter_content = html.unescape(chapter_content)
# print(chapter_content) # print(chapter_content)
if book_name == "前端体验设计":
section_list = re.findall(r'<h2.*?>(.*?)</h2>', section_list = re.findall(r'<h2.*?>(.*?)</h2>',
chapter_content, chapter_content,
flags=re.S) flags=re.S)
...@@ -71,8 +70,8 @@ def extract_code(book_mapping): ...@@ -71,8 +70,8 @@ def extract_code(book_mapping):
section = section.replace(' ', ' ') section = section.replace(' ', ' ')
if section.find(r'/') != -1: if section.find(r'/') != -1:
section = section.replace('/', '') section = section.replace('/', '')
section_dir = os.path.join( section_dir = os.path.join(chapter_dir,
chapter_dir, '{}.{}'.format(idx + 1, section)) '{}.{}'.format(idx + 1, section))
print(section_dir) print(section_dir)
if not os.path.exists(section_dir): if not os.path.exists(section_dir):
os.mkdir(section_dir) os.mkdir(section_dir)
...@@ -81,13 +80,13 @@ def extract_code(book_mapping): ...@@ -81,13 +80,13 @@ def extract_code(book_mapping):
if idx == 0: if idx == 0:
html_save_path = os.path.join(chapter_dir, 'text.html') html_save_path = os.path.join(chapter_dir, 'text.html')
else: else:
html_save_path = os.path.join( html_save_path = os.path.join(section_dir_list[idx - 1],
section_dir_list[idx - 1], 'text.html') 'text.html')
# with open(html_save_path, 'w', encoding='utf-8') as f: # with open(html_save_path, 'w', encoding='utf-8') as f:
# f.write(section_content) # f.write(section_content)
code_list = re.findall(r'<code>(.*?)</code>', code_list = re.findall(r'<code>(.*?)</code>', section_content,
section_content, re.S) re.S)
res_codelist = [] res_codelist = []
for code in code_list: for code in code_list:
...@@ -122,8 +121,8 @@ def extract_code(book_mapping): ...@@ -122,8 +121,8 @@ def extract_code(book_mapping):
# print(save_file_name) # print(save_file_name)
if idx == 0: if idx == 0:
code_save_path = os.path.join( code_save_path = os.path.join(chapter_dir,
chapter_dir, 'code_0.css') 'code_0.css')
else: else:
count += 1 count += 1
code_save_path = os.path.join( code_save_path = os.path.join(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册