diff --git a/.gitignore b/.gitignore index 87c4263b7c518708415d1afaaff664952025d71d..aea3ef2bdbb4aff27bbf8a7cfbd4ace0f05f941e 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ data_source/dailycode test.cpp test.py test.java +tree_algorithm_view.xlsx \ No newline at end of file diff --git a/helper.py b/helper.py index 55ac2c0bb864a10b1a3f19f7be83aa1736e23d00..47280531f9cd8f29b5bbeb0bfd0ebabb85f03e38 100644 --- a/helper.py +++ b/helper.py @@ -6,6 +6,8 @@ import collections import uuid import json import shutil +import pandas as pd +from pyparsing import match_previous_expr def get_files_path(file_dir, filetype='.txt'): @@ -518,4 +520,46 @@ def auto_gen_increment_exercises_template(): f.write(solution_md_data) -auto_gen_increment_exercises_template() \ No newline at end of file +from fuzzywuzzy import fuzz + +def add_skilltree_leaf_link(): + data = pd.read_excel('tree_algorithm_view.xlsx') + + dir_list = ['data/1.dailycode初阶', 'data/2.dailycode中阶', 'data/3.dailycode高阶'] + languages = ['1.cpp', '2.java', '3.python'] + cout = 0 + for dir in dir_list: + for language in languages: + dest_dir = os.path.join(dir, language) + exercises_dirs_ = os.listdir(dest_dir) + exercises_dirs = [] + for tem_dir in exercises_dirs_: + tem_dir = os.path.join(dest_dir, tem_dir) + if os.path.isdir(tem_dir): + solution_md_path = os.path.join(tem_dir, 'solution.md') + with open(solution_md_path, 'r') as f: + solution_md_data = f.read() + title = re.findall(r'^# (.*)\n', solution_md_data)[0] + + match_res = [] + for idx, item in data.iterrows(): + text = item[2] + '-' + item[3] + link = 'https://edu.csdn.net/skill/algorithm/{}'.format(item[4]) + match_score = fuzz.partial_ratio(title, text) + match_res.append(match_score) + max_score = max(match_res) + index = match_res.index(max_score) + target = data.loc[index][3] + link = 'https://edu.csdn.net/skill/algorithm/{}'.format(data.loc[index][4]) + + solution_json_path = os.path.join(tem_dir, 'solution.json') + solution_json_data = load_json(solution_json_path) + solution_json_data['skilltree_link'] = link + dump_json(solution_json_path, solution_json_data) + + print("query: ", title, "target: ", target, "max_score: ", max_score) + print(link) + + + +add_skilltree_leaf_link() \ No newline at end of file