import os import re import sys import uuid import json import shutil def get_files_path(file_dir, filetype='.txt'): """得到文件夹下的所有.txt文件的路径 Args: file_dir: 文件夹路径 filetype: 文件后缀 Returns: 所有filetype类型文件的绝对路径 """ files_path = [] for root, dirs, files in os.walk(file_dir): for file in files: if filetype is None or (os.path.splitext(file)[1] == filetype): files_path.append(os.path.join(root, file)) return files_path def load_json(path): """ load_json(path:str)->jsObject 从指定文件读取内容,解析为 json 返回 @param path: 文件路径 @return: 解析后的 json """ with open(path) as f: data = f.read() return json.loads(data) def dump_json(path, data): """ dump_json(path:str, data:obj)->None 从指定文件读取内容,解析为 json 返回 @param path: 文件路径 @param data: json 对象 @return: None """ with open(path, "w+") as df: df.write(json.dumps(data, indent=2, ensure_ascii=False)) def classify_exercises(): language_ext = { 'cpp': '.cpp', 'java': '.java', 'python': '.py' } answer_dirs = ['data_backup/cpp_code_json', 'data_backup/java_code_json', 'data_backup/python_code_json'] for dir in answer_dirs: count_simple = 0 count_middle = 0 count_diff = 0 language = dir.split('/')[-1].split('_')[0] ext = language_ext[language] files = get_files_path(dir, '.json') for file_path in files: data = load_json(file_path) status = data['status'] if status == 0: continue difficulty = data['difficulty'] question_title = data['question_title'] question_content = data['question_content'] answer = data[language] license = data['license'] keywords = data['keywords'] config_data = {} config_data['node_id'] = 'dailycode-' + uuid.uuid4().hex config_data['keywords'] = [] config_data['children'] =[] config_data['export'] = ["solution.json"] solution_json_data = {} solution_json_data['type'] = 'code_options' solution_json_data['author'] = 'csdn.net' solution_json_data['source'] = 'solution.md' solution_json_data['exercise_id'] = uuid.uuid4().hex solution_json_data['keywords'] = keywords solution_md_data = f"# {question_title}\n\n{question_content}\n\n## template\n\n```{language}\n{answer}\n```\n\n## 答案\n\n```{language}\n\n```\n\n## 选项\n\n### A\n\n```{language}\n\n```\n\n### B\n\n```{language}\n\n```\n\n### C\n\n```{language}\n\n```" # print(solution_md_data) # print(config_data) # print(solution_json_data) if difficulty == '简单': root_dir = 'data/1.dailycode初阶' elif difficulty == '中等': root_dir = 'data/2.dailycode中阶' elif difficulty == '困难': root_dir = 'data/3.dailycode高阶' else: root_dir = '' sys.exit("难度等级异常") if language == 'cpp': language_dir = '1.' + language elif language == 'java': language_dir = '2.' + language elif language == 'python': language_dir = '3.' + language else: language_dir = '' sys.exit("语言类型异常") dst_dir = os.path.join(root_dir, language_dir) if difficulty == '简单': count_simple +=1 exercises_dir = os.path.join(dst_dir, str(count_simple) + '.exercises') elif difficulty == '中等': count_middle +=1 exercises_dir = os.path.join(dst_dir, str(count_middle) + '.exercises') elif difficulty == '困难': count_diff +=1 exercises_dir = os.path.join(dst_dir, str(count_diff) + '.exercises') print(exercises_dir) solution_json_path = os.path.join(exercises_dir, 'solution.json') solution_md_path = os.path.join(exercises_dir, 'solution.md') config_path = os.path.join(exercises_dir, 'config.json') if not os.path.exists(exercises_dir): os.makedirs(exercises_dir) dump_json(solution_json_path, solution_json_data) dump_json(config_path, config_data) with open(solution_md_path, 'w', encoding='utf-8') as f: f.write(solution_md_data) def classify_leetcode(): leetcode_dir = 'data_backup/leetcode' files = get_files_path(leetcode_dir, '.json') for file in files: for language in ['java', 'python', 'cpp']: data = load_json(file) status = data['status'] if status == 0: continue difficulty = data['difficulty'] question_title = data['question_title'] question_content = data['question_content'] answer = data[language] license = data['license'][language] keywords = data['keywords'] config_data = {} config_data['node_id'] = 'dailycode-' + uuid.uuid4().hex config_data['keywords'] = [] config_data['children'] =[] config_data['export'] = ["solution.json"] solution_json_data = {} solution_json_data['type'] = 'code_options' if language == 'java': solution_json_data['author'] = 'csdn.net' else: solution_json_data['author'] = license solution_json_data['source'] = 'solution.md' solution_json_data['exercise_id'] = uuid.uuid4().hex solution_json_data['keywords'] = keywords solution_md_data = f"# {question_title}\n\n{question_content}\n\n## template\n\n```{language}\n{answer}\n```\n\n## 答案\n\n```{language}\n\n```\n\n## 选项\n\n### A\n\n```{language}\n\n```\n\n### B\n\n```{language}\n\n```\n\n### C\n\n```{language}\n\n```" # print(solution_md_data) # print(config_data) # print(solution_json_data) if difficulty == '简单': root_dir = 'data/1.dailycode初阶' elif difficulty == '中等': root_dir = 'data/2.dailycode中阶' elif difficulty == '困难': root_dir = 'data/3.dailycode高阶' else: root_dir = '' sys.exit("难度等级异常") if language == 'cpp': language_dir = '1.' + language elif language == 'java': language_dir = '2.' + language elif language == 'python': language_dir = '3.' + language else: language_dir = '' sys.exit("语言类型异常") current_dst_dir = os.path.join(root_dir, language_dir) print(current_dst_dir) dir_list_ = os.listdir(current_dst_dir) dir_list = [] for i in dir_list_: current_dst_dir_ = os.path.join(current_dst_dir, i) if os.path.isdir(current_dst_dir_): dir_list.append(current_dst_dir_) number = len(dir_list) + 1 dst_dir = os.path.join(current_dst_dir, str(number) + '.exercises') solution_json_path = os.path.join(dst_dir, 'solution.json') solution_md_path = os.path.join(dst_dir, 'solution.md') config_path = os.path.join(dst_dir, 'config.json') if not os.path.exists(dst_dir): os.mkdir(dst_dir) dump_json(solution_json_path, solution_json_data) dump_json(config_path, config_data) with open(solution_md_path, 'w', encoding='utf-8') as f: f.write(solution_md_data) def rename_dir(): dirs = ['data/1.dailycode初阶', 'data/2.dailycode中阶', 'data/3.dailycode高阶'] for dir in dirs: lanuages_dirs = ['1.cpp', '2.java', '3.python'] for lanuages_dir in lanuages_dirs: lanuages_dir = os.path.join(dir, lanuages_dir) # print(lanuages_dir) exercises_dirs = os.listdir(lanuages_dir) print(exercises_dirs) for idx, exer in enumerate(exercises_dirs): exercises_dir = os.path.join(lanuages_dir, exer) new_exercises_dir = os.path.join(lanuages_dir, '{}.exercises'.format(idx + 1)) # print(exercises_dir) # print(new_exercises_dir) # os.rename(exercises_dir, new_exercises_dir) def extract_it_knowledge(): data_dir = 'data_backup/it_knowledge' files = get_files_path(data_dir, '.md') language = 'json' for file in files: with open(file, 'r', encoding='utf-8') as f: data = f.read() file_name = file.split('/')[-1].split('.')[0] # print(file_name) # print(data) question_title = re.findall(r'## 标题\n(.*?)\n##', data, re.S)[0] question_title = question_title.strip() # print(question_title) question_content = re.findall(r'## 描述\n(.*?)\n##', data, re.S)[0] question_content = question_content.strip() # if question_content == []: # print(file) keywords = re.findall(r'## 关键词\n(.*?)\n##', data, re.S)[0] keywords = keywords.strip() keywords_list = keywords.split(';') keywords = ','.join(keywords_list) # print(keywords) topic_link = re.findall(r'## 链接\n(.*?)\n##', data, re.S)[0] topic_link = topic_link.strip() # print(topic_link) difficulty = '简单' choice = re.findall(r'## 选项\n(.*?)\n##', data, re.S)[0] choice_list = choice.split('\n') choice_list_res = [] for tem in choice_list: if tem == '': continue else: tem = tem.strip() choice_list_res.append(tem) # print(choice_list_res) answer = re.findall(r'## 答案\n(.*)', data, re.S)[0] answer = answer.strip() print(file) assert answer in choice_list_res question_id = file_name choice_list_remove = [] for idx, val in enumerate(choice_list_res): if val == answer: answer_idx = idx continue else: choice_list_remove.append(val) config_data = {} config_data['node_id'] = 'dailycode-' + uuid.uuid4().hex config_data['keywords'] = [] config_data['children'] =[] config_data['export'] = ["solution.json"] solution_json_data = {} solution_json_data['type'] = 'code_options' solution_json_data['author'] = 'csdn.net' solution_json_data['source'] = 'solution.md' solution_json_data['exercise_id'] = uuid.uuid4().hex solution_json_data['keywords'] = keywords solution_json_data['topic_link'] = topic_link solution_md_data = f"# {question_title}\n\n{question_content}\n\n## 答案\n\n```{language}\n{answer}\n```\n\n## 选项\n\n### A\n\n```{language}\n{choice_list_remove[0]}\n```\n\n### B\n\n```{language}\n{choice_list_remove[1]}\n```\n\n### C\n\n```{language}\n{choice_list_remove[2]}\n```" print(solution_md_data) classify_leetcode() # classify_leetcode()