import os import re import shutil import json import uuid import argparse import collections parser = argparse.ArgumentParser() parser.add_argument("--run", type=str, help="Decide to run which function") args = parser.parse_args() helper_function = args.run def get_files_path(file_dir, filetype='.txt'): """得到文件夹下的所有.txt文件的路径 Args: file_dir: 文件夹路径 filetype: 文件后缀 Returns: 所有filetype类型文件的绝对路径 """ files_path = [] for root, dirs, files in os.walk(file_dir): for file in files: if filetype is None or (os.path.splitext(file)[1] == filetype): files_path.append(os.path.join(root, file)) return files_path def leetcode_helper(): data_dir = 'data/3.算法高阶/1.leetcode' dailycode_exercises_dir = '/Users/zhangzc/Desktop/workplace/daily-code-data/data/input/dailycode/leetcode/exercises' crawer_leetcode_dir = '/Users/zhangzc/Desktop/workplace/LeetCodeCN-Problem-Crawler/leetcode_html' dirs_ = os.listdir(data_dir) dirs = [] for dir in dirs_: dir = os.path.join(data_dir, dir) if os.path.isdir(dir): dirs.append(dir) for dir in dirs: assert os.path.isdir(dir) exercises_id = dir.split('/')[-1].split('_')[0] if 0 <= int(exercises_id) and int(exercises_id) < 100: desc_src_path = os.path.join(os.path.join(dailycode_exercises_dir, exercises_id), '{}_desc.html'.format(exercises_id)) cpp_code_src_path = os.path.join(os.path.join(dailycode_exercises_dir, exercises_id),'{}.cpp'.format(exercises_id)) desc_dst_path = os.path.join(dir, 'desc.html') cpp_code_dst_path = os.path.join(dir, 'solution.cpp') # print(cpp_code_src_path) # print(cpp_code_dst_path) shutil.copy(desc_src_path, desc_dst_path) shutil.copy(cpp_code_src_path, cpp_code_dst_path) else: cpp_code_dst_path = os.path.join(dir, 'solution.cpp') shell_code_dst_path = os.path.join(dir, 'solution.sh') sql_code_dst_path = os.path.join(dir, 'solution.sql') if not os.path.exists(cpp_code_dst_path): open(cpp_code_dst_path, 'w', encoding='utf-8') if 100 <= int(exercises_id) and int(exercises_id) < 203: with open(cpp_code_dst_path, 'r', encoding='utf-8') as f: cpp_code = f.read() if cpp_code == '' and not os.path.exists(shell_code_dst_path) and not os.path.exists(sql_code_dst_path): print(cpp_code_dst_path) desc_src_path = os.path.join(crawer_leetcode_dir, str(int(exercises_id) + 1) + '.html') desc_dst_path = os.path.join(dir, 'desc.html') # print(desc_src_path) # print(desc_dst_path) if os.path.exists(desc_src_path): shutil.copy(desc_src_path, desc_dst_path) else: pass # print("该路径不存在,请检查: {}".format(desc_src_path)) def leetcode_helper_delete_md(): data_dir = 'data/3.算法高阶/1.leetcode' dirs_ = os.listdir(data_dir) dirs = [] for dir in dirs_: dir = os.path.join(data_dir, dir) if os.path.isdir(dir): dirs.append(dir) for dir in dirs: assert os.path.isdir(dir) exercises_id = dir.split('/')[-1].split('_')[0] title = dir.split('/')[-1].split('_')[1] if 0 <= int(exercises_id) and int(exercises_id) < 100: solution_md_path = os.path.join(dir, 'solution.md') # print(solution_md_path) with open('leetcode_template.md', 'r', encoding='utf-8') as f: template = f.read() template = template.replace('# 两数之和', '# {}'.format(title)) with open(solution_md_path, 'r', encoding='utf-8') as f: leetcode_solution_md_data = f.read() if leetcode_solution_md_data == template: os.remove(solution_md_path) def leetcode_helper_update_md(): data_dir = 'data/3.算法高阶/1.leetcode' dirs_ = os.listdir(data_dir) dirs = [] for dir in dirs_: dir = os.path.join(data_dir, dir) if os.path.isdir(dir): dirs.append(dir) for dir in dirs: assert os.path.isdir(dir) exercises_id = dir.split('/')[-1].split('_')[0] title = dir.split('/')[-1].split('_')[1] if 0 <= int(exercises_id) and int(exercises_id) < 500: solution_md_path = os.path.join(dir, 'solution.md') desc_html_path = os.path.join(dir, 'desc.html') if not os.path.exists(desc_html_path): continue with open(solution_md_path, 'r', encoding='utf-8') as f: solution_md_data = f.read() with open(desc_html_path, 'r', encoding='utf-8') as f: desc_html_data = f.read() content = re.findall('# .*?\n(.*?)\n## aop'.format(title), solution_md_data, re.DOTALL)[0] new_content = desc_html_data + "\n
{}
".format("以下错误的选项是?") # print(solution_md_path) solution_md_data = solution_md_data.replace(content, new_content) # print(solution_md_data) with open(solution_md_path, 'w', encoding='utf-8') as f: f.write(solution_md_data) def leetcode_helper_update_config(): data_dir = 'data/3.算法高阶/1.leetcode' dirs_ = os.listdir(data_dir) dirs = [] for dir in dirs_: dir = os.path.join(data_dir, dir) if os.path.isdir(dir): dirs.append(dir) for dir in dirs: assert os.path.isdir(dir) exercises_id = dir.split('/')[-1].split('_')[0] title = dir.split('/')[-1].split('_')[1] if 0 <= int(exercises_id) and int(exercises_id) < 500: solution_md_path = os.path.join(dir, 'solution.md') config_json_path = os.path.join(dir, 'config.json') solution_json_path = os.path.join(dir, 'solution.json') if os.path.exists(solution_md_path): with open(config_json_path, 'r', encoding='utf-8') as f: config_data = json.load(f) config_data['export'] = ['solution.json'] config_data['title'] = title config_data['keywords'] = ['leetcode', title] config_data_json = json.dumps(config_data, ensure_ascii=False, indent=4) with open(config_json_path, 'w', encoding='utf-8') as f: f.write(config_data_json) exercise_id = uuid.uuid4().hex solution_json_data = { "type": "code_options", "author": "CSDN.net", "source": "solution.md", "exercise_id":exercise_id, } solution_json = json.dumps(solution_json_data, ensure_ascii=False, indent=3) with open(solution_json_path, 'w', encoding='utf-8') as f: f.write(solution_json) def count_tag_class(): data_dir = '/Users/zhangzc/Desktop/workplace/skill_tree_pipeline/data/input/dailycode/leetcode/exercises' files = get_files_path(data_dir, '.json') tags_lists = [] for file in files: with open(file, 'r') as f: data = json.load(f) tags = data['tags'] tags_list = tags.split(',') tags_lists.extend(tags_list) tags_set = set(tags_lists) print(tags_set) def count_exercises(): dirs = ['data/2.算法中阶', 'data/3.算法高阶'] exercises_ids = [] for dir in dirs: dirs_ = os.listdir(dir) algo_floor_dirs = [] for algo_floor_dir in dirs_: leetcode_class_dir = os.path.join(dir, algo_floor_dir) if os.path.isdir(leetcode_class_dir): algo_floor_dirs.append(leetcode_class_dir) exercises_dirs = [] for algo_floor_dir in algo_floor_dirs: exercises_dirs_ = os.listdir(algo_floor_dir) for exercises_dir_ in exercises_dirs_: exercises_dir = os.path.join(algo_floor_dir, exercises_dir_) if os.path.isdir(exercises_dir): exercises_dirs.append(exercises_dir) for exercises_dir in exercises_dirs: # print(exercises_dir) exercises_id = int(exercises_dir.split('/')[-1].split('_')[0]) exercises_ids.append(exercises_id) try: assert len(set(exercises_ids)) == len(exercises_ids) except: print(collections.Counter(exercises_ids)) print('------分割线-------') dst_exercises_ids = [i for i in range(200)] lacked_id = set(dst_exercises_ids) - set(exercises_ids) print(lacked_id) def modify_config_and_dir_name(): # dirs = ['data/2.算法中阶', 'data/3.算法高阶'] # exercises_ids = [] # for dir in dirs: dir = 'data_backup/1.leetcode' dirs_ = os.listdir(dir) algo_floor_dirs = [] for algo_floor_dir in dirs_: leetcode_class_dir = os.path.join(dir, algo_floor_dir) if os.path.isdir(leetcode_class_dir): algo_floor_dirs.append(leetcode_class_dir) exercises_dirs = [] for algo_floor_dir in algo_floor_dirs: print(algo_floor_dir) root_dir = '/'.join(algo_floor_dir.split('/')[:-1]) exercises_id = algo_floor_dir.split('/')[-1].split('.')[0] title = algo_floor_dir.split('/')[-1].split('.')[-1] config_path = os.path.join(algo_floor_dir, 'config.json') config_data = {} # with open(config_path, 'r', encoding='utf-8') as f: # config_data = json.load(f) config_data['node_id'] = "algorithm-" + uuid.uuid4().hex config_data['keywords'] = ["leetcode", title] config_data['children'] = [] config_data['export'] = ['solution.json'] config_data['title'] = title print(title) config_data_json = json.dumps(config_data, ensure_ascii=False, indent=2) with open(config_path, 'w', encoding='utf-8') as f: f.write(config_data_json) def modify_config_and_dir_name_new(): dirs = ['data/2.算法中阶', 'data/3.算法高阶', 'data/1.算法初阶'] exercises_ids = [] for dir in dirs: dirs_ = os.listdir(dir) algo_floor_dirs = [] for algo_floor_dir in dirs_: leetcode_class_dir = os.path.join(dir, algo_floor_dir) if os.path.isdir(leetcode_class_dir): algo_floor_dirs.append(leetcode_class_dir) for algo_floor_dir in algo_floor_dirs: exercises_dirs_ = os.listdir(algo_floor_dir) exercises_dirs = [] for exercises_dir_ in exercises_dirs_: exercises_dir = os.path.join(algo_floor_dir, exercises_dir_) if os.path.isdir(exercises_dir): exercises_dirs.append(exercises_dir) for idx, tem_dir in enumerate(exercises_dirs): config_path = os.path.join(tem_dir, 'config.json') solution_md_path = os.path.join(tem_dir, 'solution.md') if dir == "data/1.算法初阶": title = tem_dir.split('/')[-1].split('.')[-1] else: title = tem_dir.split('/')[-1].split('-')[-1] with open(solution_md_path, 'r', encoding='utf-8') as f: solution_md_data = f.read() if solution_md_data.find('# {}\n\n'.format(title)) == -1: solution_md_data = solution_md_data.replace('# {}'.format(title), '# {}\n'.format(title)) print(tem_dir) if solution_md_data.find('## aop\n\n') == -1: solution_md_data = solution_md_data.replace('## aop', '## aop\n') if solution_md_data.find('## 答案\n\n') == -1: solution_md_data = solution_md_data.replace('## 答案', '## 答案\n') if solution_md_data.find('## 选项\n\n') == -1: solution_md_data = solution_md_data.replace('## 选项', '## 选项\n') if solution_md_data.find('### before\n\n') == -1: solution_md_data = solution_md_data.replace('### before', '### before\n') if solution_md_data.find('### after\n\n') == -1: solution_md_data = solution_md_data.replace('### after', '### after\n') if solution_md_data.find('\n\n```cpp') == -1: solution_md_data = solution_md_data.replace('```cpp', '\n```cpp') with open(solution_md_path, 'w', encoding='utf-8') as f: f.write(solution_md_data) # with open(config_path, 'r', encoding='utf-8') as f: # config_data = json.load(f) # del config_data['title'] # config_data_json = json.dumps(config_data, ensure_ascii=False, indent=2) # print(config_data_json) # with open(config_path, 'w', encoding='utf-8') as f: # f.write(config_data_json) def modify_back_up_dir_name(): # dirs = ['data/2.算法中阶', 'data/3.算法高阶'] # exercises_ids = [] # for dir in dirs: dir = 'data_backup/1.leetcode' dirs_ = os.listdir(dir) algo_floor_dirs = [] for algo_floor_dir in dirs_: leetcode_class_dir = os.path.join(dir, algo_floor_dir) if os.path.isdir(leetcode_class_dir): algo_floor_dirs.append(leetcode_class_dir) exercises_dirs = [] for algo_floor_dir in algo_floor_dirs: print(algo_floor_dir) root_dir = '/'.join(algo_floor_dir.split('/')[:-1]) exercises_id = algo_floor_dir.split('/')[-1].split('.')[0] title = algo_floor_dir.split('/')[-1].split('.')[-1] new_dir_name = '{}-{}'.format(int(exercises_id) + 1, title) new_dir_name = os.path.join(root_dir, new_dir_name) os.rename(algo_floor_dir, new_dir_name) print(new_dir_name) def leetcode_helper_add_sloutionjson(): data_dir = 'data_backup/1.leetcode' dirs_ = os.listdir(data_dir) dirs = [] for dir in dirs_: dir = os.path.join(data_dir, dir) if os.path.isdir(dir): dirs.append(dir) for dir in dirs: assert os.path.isdir(dir) # exercises_id = dir.split('/')[-1].split('_')[0] # title = dir.split('/')[-1].split('_')[1] # solution_md_path = os.path.join(dir, 'solution.md') # config_json_path = os.path.join(dir, 'config.json') solution_json_path = os.path.join(dir, 'solution.json') print(solution_json_path) if not os.path.exists(solution_json_path): exercise_id = uuid.uuid4().hex solution_json_data = { "type": "code_options", "author": "CSDN.net", "source": "solution.md", "exercise_id":exercise_id, } solution_json = json.dumps(solution_json_data, ensure_ascii=False, indent=2) with open(solution_json_path, 'w', encoding='utf-8') as f: f.write(solution_json) if helper_function == 'count_tag_class': count_tag_class() if helper_function == 'count_exercises': count_exercises() if helper_function == 'modify_back_up_dir_name': modify_back_up_dir_name() modify_config_and_dir_name_new() # leetcode_helper_update_md() # leetcode_helper_update_config()