diff --git "a/data/\345\205\250\347\250\213\350\275\257\344\273\266\346\265\213\350\257\225\357\274\210\347\254\2543\347\211\210\357\274\211.json" "b/data/\345\205\250\347\250\213\350\275\257\344\273\266\346\265\213\350\257\225\357\274\210\347\254\2543\347\211\210\357\274\211.json" new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ "b/data/\345\205\250\347\250\213\350\275\257\344\273\266\346\265\213\350\257\225\357\274\210\347\254\2543\347\211\210\357\274\211.json" @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/main.py b/main.py index db731070eaf4fbe35a0cc6cf328e9e271d19938e..55f6df8abe01c20841f91f4b0291a97795592a9e 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,9 @@ from src.ebook.extract_book_code import extract_code +from src.ebook.community import send_topic if __name__ == "__main__": extract_code() + web_url = 'https://gitcode.net/csdn/content/book_code_825acb73c85c4c4bb9632afe858bc097/-/tree/master/' + print('-------' * 20) + print('开始向社区发帖') + send_topic(web_url) \ No newline at end of file diff --git a/src/ebook/community.py b/src/ebook/community.py new file mode 100644 index 0000000000000000000000000000000000000000..9e5bbe0798a10322857e0ec8c2b3a0f0a928dbea --- /dev/null +++ b/src/ebook/community.py @@ -0,0 +1,109 @@ +import os +import json +import html +import requests +import logging + +logger = logging.getLogger(__name__) + + +def get_files_path(file_dir, filetype='.txt'): + """得到文件夹下的所有.txt文件的路径 + Args: + file_dir: 文件夹路径 + filetype: 文件后缀 + Returns: + 所有filetype类型文件的绝对路径 + """ + files_path = [] + for root, dirs, files in os.walk(file_dir): + for file in files: + if filetype is None or (os.path.splitext(file)[1] == filetype): + files_path.append(os.path.join(root, file)) + return files_path + + +def post(url, params, retry=3, headers=None): + if headers is None: + hdrs = {"Content-Type": "application/json"} + else: + hdrs = headers + fails = 0 + while fails < retry: + try: + if headers is None: + data = json.dumps(params) + else: + data = params + logger.debug(f"will post {data} to {url}") + resp = requests.post(url, data, headers=hdrs, timeout=10) + if resp: + logger.info(f"resp {resp.content}") + return resp.json() + else: + logger.error(f"resp: [{resp}]") + fails += 1 + except Exception as error: + logger.error(f"post {params} to {url} failed {error}") + fails += 1 + if fails > retry: + raise error + + +def send_topic(web_url): + data_dir = 'data' + book_dir = 'data/全程软件测试(第3版)/' + # web_url = "https://codechina.csdn.net/csdn/book_code_c798a5992a654857867ec15660e1c32a/-/blob/master/" + request_url = 'http://ccloud.internal.csdn.net/v1/internal/community/content/sendTopic' + + files = get_files_path('data/全程软件测试(第3版)', '.java') + mapping_path = 'data/全程软件测试(第3版).json' + + if not os.path.exists(mapping_path): + chapter_code_mapping = {} + save_mapping = json.dumps(chapter_code_mapping, + ensure_ascii=False, + indent=2) + with open(mapping_path, 'w') as f: + f.write(save_mapping) + + with open(mapping_path, 'r') as f: + chapter_code_mapping = json.load(f) + + for file in files: + topic_title = file.replace(book_dir, '') + topic_title = topic_title.replace('/', '|') + topic_title = topic_title.replace(' ', '.') + # topic_title = html.escape(topic_title) + topic_content = web_url + file + topic_content = "代码:{}".format( + topic_content, topic_title) + + print(topic_title) + + send_topic_request_param = { + "type": "long_text", + "cateId": 20965, + "content": topic_content, + "topicTitle": topic_title, + "mdContent": topic_content, + "communityId": 3822, + "loginUserName": "BBS_Assistant", + "bizNo": "ebook" + } + + if chapter_code_mapping.get(file) is None: + resp = post(request_url, send_topic_request_param) + topic_link = resp['data']['content']['url'] + chapter_code_mapping[file] = topic_link + print('{}:{}'.format(file, topic_link)) + save_mapping = json.dumps(chapter_code_mapping, + ensure_ascii=False, + indent=2) + with open(mapping_path, 'w') as f: + f.write(save_mapping) + else: + send_topic_request_param['id'] = int( + chapter_code_mapping[file].split('/')[-1]) + resp = post(request_url, send_topic_request_param) + print('{}:{}'.format(file, chapter_code_mapping.get(file)))