import os import re import shutil import json import uuid def get_files_path(file_dir, filetype='.txt'): """得到文件夹下的所有.txt文件的路径 Args: file_dir: 文件夹路径 filetype: 文件后缀 Returns: 所有filetype类型文件的绝对路径 """ files_path = [] for root, dirs, files in os.walk(file_dir): for file in files: if filetype is None or (os.path.splitext(file)[1] == filetype): files_path.append(os.path.join(root, file)) return files_path def leetcode_helper(): data_dir = 'data/3.算法高阶/1.leetcode' dailycode_exercises_dir = '/Users/zhangzc/Desktop/workplace/daily-code-data/data/input/dailycode/leetcode/exercises' crawer_leetcode_dir = '/Users/zhangzc/Desktop/workplace/LeetCodeCN-Problem-Crawler/leetcode_html' dirs_ = os.listdir(data_dir) dirs = [] for dir in dirs_: dir = os.path.join(data_dir, dir) if os.path.isdir(dir): dirs.append(dir) for dir in dirs: assert os.path.isdir(dir) exercises_id = dir.split('/')[-1].split('_')[0] if 0 <= int(exercises_id) and int(exercises_id) < 100: desc_src_path = os.path.join(os.path.join(dailycode_exercises_dir, exercises_id), '{}_desc.html'.format(exercises_id)) cpp_code_src_path = os.path.join(os.path.join(dailycode_exercises_dir, exercises_id),'{}.cpp'.format(exercises_id)) desc_dst_path = os.path.join(dir, 'desc.html') cpp_code_dst_path = os.path.join(dir, 'solution.cpp') # print(cpp_code_src_path) # print(cpp_code_dst_path) shutil.copy(desc_src_path, desc_dst_path) shutil.copy(cpp_code_src_path, cpp_code_dst_path) else: cpp_code_dst_path = os.path.join(dir, 'solution.cpp') shell_code_dst_path = os.path.join(dir, 'solution.sh') sql_code_dst_path = os.path.join(dir, 'solution.sql') if not os.path.exists(cpp_code_dst_path): open(cpp_code_dst_path, 'w', encoding='utf-8') if 100 <= int(exercises_id) and int(exercises_id) < 203: with open(cpp_code_dst_path, 'r', encoding='utf-8') as f: cpp_code = f.read() if cpp_code == '' and not os.path.exists(shell_code_dst_path) and not os.path.exists(sql_code_dst_path): print(cpp_code_dst_path) desc_src_path = os.path.join(crawer_leetcode_dir, str(int(exercises_id) + 1) + '.html') desc_dst_path = os.path.join(dir, 'desc.html') # print(desc_src_path) # print(desc_dst_path) if os.path.exists(desc_src_path): shutil.copy(desc_src_path, desc_dst_path) else: pass # print("该路径不存在,请检查: {}".format(desc_src_path)) def leetcode_helper_delete_md(): data_dir = 'data/3.算法高阶/1.leetcode' dirs_ = os.listdir(data_dir) dirs = [] for dir in dirs_: dir = os.path.join(data_dir, dir) if os.path.isdir(dir): dirs.append(dir) for dir in dirs: assert os.path.isdir(dir) exercises_id = dir.split('/')[-1].split('_')[0] title = dir.split('/')[-1].split('_')[1] if 0 <= int(exercises_id) and int(exercises_id) < 100: solution_md_path = os.path.join(dir, 'solution.md') # print(solution_md_path) with open('leetcode_template.md', 'r', encoding='utf-8') as f: template = f.read() template = template.replace('# 两数之和', '# {}'.format(title)) with open(solution_md_path, 'r', encoding='utf-8') as f: leetcode_solution_md_data = f.read() if leetcode_solution_md_data == template: os.remove(solution_md_path) def leetcode_helper_update_md(): data_dir = 'data/3.算法高阶/1.leetcode' dirs_ = os.listdir(data_dir) dirs = [] for dir in dirs_: dir = os.path.join(data_dir, dir) if os.path.isdir(dir): dirs.append(dir) for dir in dirs: assert os.path.isdir(dir) exercises_id = dir.split('/')[-1].split('_')[0] title = dir.split('/')[-1].split('_')[1] if 0 <= int(exercises_id) and int(exercises_id) < 500: solution_md_path = os.path.join(dir, 'solution.md') desc_html_path = os.path.join(dir, 'desc.html') if not os.path.exists(desc_html_path): continue with open(solution_md_path, 'r', encoding='utf-8') as f: solution_md_data = f.read() with open(desc_html_path, 'r', encoding='utf-8') as f: desc_html_data = f.read() content = re.findall('# .*?\n(.*?)\n## aop'.format(title), solution_md_data, re.DOTALL)[0] new_content = desc_html_data + "\n

{}

".format("以下错误的选项是?") # print(solution_md_path) solution_md_data = solution_md_data.replace(content, new_content) # print(solution_md_data) with open(solution_md_path, 'w', encoding='utf-8') as f: f.write(solution_md_data) def leetcode_helper_update_config(): data_dir = 'data/3.算法高阶/1.leetcode' dirs_ = os.listdir(data_dir) dirs = [] for dir in dirs_: dir = os.path.join(data_dir, dir) if os.path.isdir(dir): dirs.append(dir) for dir in dirs: assert os.path.isdir(dir) exercises_id = dir.split('/')[-1].split('_')[0] title = dir.split('/')[-1].split('_')[1] if 0 <= int(exercises_id) and int(exercises_id) < 500: solution_md_path = os.path.join(dir, 'solution.md') config_json_path = os.path.join(dir, 'config.json') solution_json_path = os.path.join(dir, 'solution.json') if os.path.exists(solution_md_path): with open(config_json_path, 'r', encoding='utf-8') as f: config_data = json.load(f) config_data['export'] = ['solution.json'] config_data['title'] = title config_data['keywords'] = ['leetcode', title] config_data_json = json.dumps(config_data, ensure_ascii=False, indent=4) with open(config_json_path, 'w', encoding='utf-8') as f: f.write(config_data_json) exercise_id = uuid.uuid4().hex solution_json_data = { "type": "code_options", "author": "CSDN.net", "source": "solution.md", "exercise_id":exercise_id, } solution_json = json.dumps(solution_json_data, ensure_ascii=False, indent=3) with open(solution_json_path, 'w', encoding='utf-8') as f: f.write(solution_json) def count_tag_class(): data_dir = '/Users/zhangzc/Desktop/workplace/skill_tree_pipeline/data/input/dailycode/leetcode/exercises' files = get_files_path(data_dir, '.json') tags_lists = [] for file in files: with open(file, 'r') as f: data = json.load(f) tags = data['tags'] tags_list = tags.split(',') tags_lists.extend(tags_list) tags_set = set(tags_lists) print(tags_set) leetcode_helper_update_md() leetcode_helper_update_config()