import os
import json
import html
import requests
import logging
logger = logging.getLogger(__name__)
def modify_dir_name():
data_dir = 'data'
dir_list_ = os.listdir(data_dir)
# print(dir_list_)
dir_list = []
for i in dir_list_:
root_dir = os.path.join(data_dir ,i)
if os.path.isdir(root_dir):
dir_list.append(root_dir)
chapter_dir_list = []
for root_dir in dir_list:
dir_list_ = os.listdir(root_dir)
for i in dir_list_:
root_dir_ = os.path.join(root_dir, i)
if os.path.isdir(root_dir_):
# print(root_dir_)
chapter_dir_list.append(root_dir_)
dst_dir_list = []
for chapter_dir in chapter_dir_list:
exercises_dir = os.listdir(chapter_dir)
for tem_dir in exercises_dir:
exercise_path = os.path.join(chapter_dir, tem_dir)
if os.path.isdir(exercise_path):
# print(exercise_path)
dst_dir_list.append(exercise_path)
for dst_dir in dst_dir_list:
try:
dir_name = dst_dir.split('/')[-1]
root_dir = '/'.join(dst_dir.split('/')[:-1])
res_dir_name = dir_name.split(' ')[1]
number = dir_name.split(' ')[0].split('.')[1] + '.'
res_dir_name = number + res_dir_name
print(dst_dir)
final_name = os.path.join(root_dir, res_dir_name)
print(final_name)
os.rename(dst_dir, final_name)
except:
print(dst_dir)
def get_files_path(file_dir, filetype='.txt'):
"""得到文件夹下的所有.txt文件的路径
Args:
file_dir: 文件夹路径
filetype: 文件后缀
Returns:
所有filetype类型文件的绝对路径
"""
files_path = []
for root, dirs, files in os.walk(file_dir):
for file in files:
if filetype is None or (os.path.splitext(file)[1] == filetype):
files_path.append(os.path.join(root, file))
return files_path
def post(url, params, retry=3, headers=None):
if headers is None:
hdrs = {"Content-Type": "application/json"}
else:
hdrs = headers
fails = 0
while fails < retry:
try:
if headers is None:
data = json.dumps(params)
else:
data = params
logger.debug(f"will post {data} to {url}")
resp = requests.post(url, data, headers=hdrs, timeout=10)
if resp:
logger.info(f"resp {resp.content}")
return resp.json()
else:
logger.error(f"resp: [{resp}]")
fails += 1
except Exception as error:
logger.error(f"post {params} to {url} failed {error}")
fails += 1
if fails > retry:
raise error
def send_topic():
data_dir = 'data'
web_url = "https://codechina.csdn.net/csdn/book_code_c798a5992a654857867ec15660e1c32a/-/blob/master/"
request_url = 'http://ccloud.internal.csdn.net/v1/internal/community/content/sendTopic'
files = get_files_path('data/books/1.Python编程无师自通', '.py')
mapping_path = 'data/topic_mapping/1.Python编程无师自通.json'
with open(mapping_path, 'r') as f:
chapter_code_mapping = json.load(f)
for file in files:
topic_title = file.replace('data/books/Python编程无师自通/', '')
topic_title = topic_title.replace('/', '|')
topic_title = topic_title.replace(' ', '.')
# topic_title = html.escape(topic_title)
topic_content = web_url + file
topic_content = "代码:{}".format(topic_content, topic_title)
send_topic_request_param = {
"type": "long_text",
"cateId": 20890,
"content": topic_content,
"topicTitle": topic_title,
"mdContent": topic_content,
"communityId": 3597,
"loginUserName": "community_286",
"bizNo": "ebook"
}
if chapter_code_mapping.get(file) is None:
# resp = post(request_url, send_topic_request_param)
# topic_link = resp['data']['content']['url']
# chapter_code_mapping[file] = topic_link
# print('{}:{}'.format(file, topic_link))
print('错误')
else:
send_topic_request_param['id'] = int(chapter_code_mapping[file].split('/')[-1])
resp = post(request_url, send_topic_request_param)
print('{}:{}'.format(file, chapter_code_mapping.get(file)))
save_mapping = json.dumps(chapter_code_mapping, ensure_ascii=False, indent=2)
print(save_mapping)
with open(mapping_path, 'w') as f:
f.write(save_mapping)
send_topic()