import os
import json
import html
import requests
import logging
logger = logging.getLogger(__name__)
def get_files_path(file_dir, filetype='.txt'):
"""得到文件夹下的所有.txt文件的路径
Args:
file_dir: 文件夹路径
filetype: 文件后缀
Returns:
所有filetype类型文件的绝对路径
"""
files_path = []
for root, dirs, files in os.walk(file_dir):
for file in files:
if filetype is None or (os.path.splitext(file)[1] == filetype):
files_path.append(os.path.join(root, file))
return files_path
def get_all_files(current_address):
files = []
for parent, dirnames, filenames in os.walk(current_address):
# Case1: traversal the directories
# for dirname in dirnames:
# print("Parent folder:", parent)
# print("Dirname:", dirname)
# # Case2: traversal the files
for filename in filenames:
# print("Parent folder:", parent)
file_path = os.path.join(parent, filename)
files.append(file_path)
return files
def post(url, params, retry=3, headers=None):
if headers is None:
hdrs = {"Content-Type": "application/json"}
else:
hdrs = headers
fails = 0
while fails < retry:
try:
if headers is None:
data = json.dumps(params)
else:
data = params
logger.debug(f"will post {data} to {url}")
resp = requests.post(url, data, headers=hdrs, timeout=10)
if resp:
logger.info(f"resp {resp.content}")
return resp.json()
else:
logger.error(f"resp: [{resp}]")
fails += 1
except Exception as error:
logger.error(f"post {params} to {url} failed {error}")
fails += 1
if fails > retry:
raise error
def send_topic(web_url, book_dir, mapping_path):
data_dir = 'data'
# web_url = "https://codechina.csdn.net/csdn/book_code_c798a5992a654857867ec15660e1c32a/-/blob/master/"
request_url = 'http://ccloud.internal.csdn.net/v1/internal/community/content/sendTopic'
# files = get_files_path('data/全程软件测试(第3版)', '.java')
files = get_all_files(book_dir)
print(files)
if not os.path.exists(mapping_path):
chapter_code_mapping = {}
save_mapping = json.dumps(chapter_code_mapping,
ensure_ascii=False,
indent=2)
with open(mapping_path, 'w') as f:
f.write(save_mapping)
with open(mapping_path, 'r') as f:
chapter_code_mapping = json.load(f)
for file in files:
topic_title = file.replace(book_dir, '')
topic_title = topic_title.replace('/', '|')
topic_title = topic_title.replace(' ', '.')
# topic_title = html.escape(topic_title)
topic_content = web_url + file
topic_content = "代码:{}".format(
topic_content, topic_title)
print(topic_title)
send_topic_request_param = {
"type": "long_text",
"cateId": 20967,
"content": topic_content,
"topicTitle": topic_title,
"mdContent": topic_content,
"communityId": 3823,
"loginUserName": "BBS_Assistant",
"bizNo": "ebook"
}
if chapter_code_mapping.get(file) is None:
resp = post(request_url, send_topic_request_param)
topic_link = resp['data']['content']['url']
chapter_code_mapping[file] = topic_link
print('{}:{}'.format(file, topic_link))
save_mapping = json.dumps(chapter_code_mapping,
ensure_ascii=False,
indent=2)
with open(mapping_path, 'w') as f:
f.write(save_mapping)
else:
send_topic_request_param['id'] = int(
chapter_code_mapping[file].split('/')[-1])
resp = post(request_url, send_topic_request_param)
print('{}:{}'.format(file, chapter_code_mapping.get(file)))