diff --git "a/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/1.AI\347\256\200\344\273\213/config.json" "b/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/1.AI\347\256\200\344\273\213/config.json" index e3c3ce2b8923f0c833d2887706b7bce53b8c3f38..57f183d90573d553d0c7fd108c6d108d23ab9275 100644 --- "a/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/1.AI\347\256\200\344\273\213/config.json" +++ "b/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/1.AI\347\256\200\344\273\213/config.json" @@ -1,18 +1,23 @@ { - "node_id": "ai-3387d5d7a7684fbb9187e26d6d8d187b", - "keywords": [], - "children": [ - { - "AI简史": { - "keywords": [ - "AI起源", - "人工智能简史" - ], - "children": [] - } - } - ], - "export": [ - "helloworld.json" - ] + "node_id": "ai-3387d5d7a7684fbb9187e26d6d8d187b", + "keywords": [], + "children": [ + { + "AI简史": { + "keywords": [ + "AI起源", + "人工智能简史" + ], + "children": [], + "keywords_must": [], + "keywords_forbid": [], + "node_id": "ai-a0605ecbad3741169541ebc6ce1b0d13" + } + } + ], + "export": [ + "helloworld.json" + ], + "keywords_must": [], + "keywords_forbid": [] } \ No newline at end of file diff --git "a/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/1.AI\347\256\200\344\273\213/helloworld.json" "b/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/1.AI\347\256\200\344\273\213/helloworld.json" index 8bb392f99f3c9a42e20281f66781cffb45750e95..3169a2ccc58c7fa24e4789c8b9db0f1fdd578fd7 100644 --- "a/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/1.AI\347\256\200\344\273\213/helloworld.json" +++ "b/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/1.AI\347\256\200\344\273\213/helloworld.json" @@ -1,6 +1,7 @@ { - "type": "code_options", - "author": "幻灰龙", - "source": "helloworld.md", - "notebook_enable": true + "type": "code_options", + "author": "幻灰龙", + "source": "helloworld.md", + "notebook_enable": true, + "exercise_id": "4b706cfc1b5a48ae8d5865bbcec074ec" } \ No newline at end of file diff --git "a/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/2.\347\272\277\346\200\247\345\217\215\345\220\221\344\274\240\346\222\255/config.json" "b/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/2.\347\272\277\346\200\247\345\217\215\345\220\221\344\274\240\346\222\255/config.json" index bad5017eda2019ee3c11a93c7d1aeb37c8ed4918..5a1a7c1a8eb55a0bf85ea795455d32a344264041 100644 --- "a/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/2.\347\272\277\346\200\247\345\217\215\345\220\221\344\274\240\346\222\255/config.json" +++ "b/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/2.\347\272\277\346\200\247\345\217\215\345\220\221\344\274\240\346\222\255/config.json" @@ -2,5 +2,7 @@ "node_id": "ai-861408a897f042fd8044bfc9838d2747", "keywords": [], "children": [], - "export": [] + "export": [], + "keywords_must": [], + "keywords_forbid": [] } \ No newline at end of file diff --git "a/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/3.\346\242\257\345\272\246\344\270\213\351\231\215/config.json" "b/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/3.\346\242\257\345\272\246\344\270\213\351\231\215/config.json" index a63ae81590b06257c4109649ae19dde80a3432fa..cbbe6a9614bceea31c9eca323da3ba596beea1b3 100644 --- "a/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/3.\346\242\257\345\272\246\344\270\213\351\231\215/config.json" +++ "b/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/3.\346\242\257\345\272\246\344\270\213\351\231\215/config.json" @@ -2,5 +2,7 @@ "node_id": "ai-8deab4930eef40b0bd9c2337e7ad5c51", "keywords": [], "children": [], - "export": [] + "export": [], + "keywords_must": [], + "keywords_forbid": [] } \ No newline at end of file diff --git "a/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/config.json" "b/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/config.json" index 27cba2afa1b10e8febc074b9f462598d7e5e15ae..39fbeb7b4c8d4db569d7745044d98cff1a8e6842 100644 --- "a/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/config.json" +++ "b/data/1.AI\345\210\235\351\230\266/1.\351\242\204\345\244\207\347\237\245\350\257\206/config.json" @@ -1,4 +1,6 @@ { "node_id": "ai-bc6f05e925e147fd8fca53041f70e022", - "keywords": [] + "keywords": [], + "keywords_must": [], + "keywords_forbid": [] } \ No newline at end of file diff --git "a/data/1.AI\345\210\235\351\230\266/2.\347\272\277\346\200\247\345\233\236\345\275\222/config.json" "b/data/1.AI\345\210\235\351\230\266/2.\347\272\277\346\200\247\345\233\236\345\275\222/config.json" index f3bece3f2cdc708e64d56d9d0849ff3b9382e738..5275de86ebad1a8ba65daa02f8ee150bc2e29c4a 100644 --- "a/data/1.AI\345\210\235\351\230\266/2.\347\272\277\346\200\247\345\233\236\345\275\222/config.json" +++ "b/data/1.AI\345\210\235\351\230\266/2.\347\272\277\346\200\247\345\233\236\345\275\222/config.json" @@ -1,4 +1,6 @@ { "node_id": "ai-f51cf279b2c94e099da0f3e1fcfc793e", - "keywords": [] + "keywords": [], + "keywords_must": [], + "keywords_forbid": [] } \ No newline at end of file diff --git "a/data/1.AI\345\210\235\351\230\266/3.\347\272\277\346\200\247\345\210\206\347\261\273/config.json" "b/data/1.AI\345\210\235\351\230\266/3.\347\272\277\346\200\247\345\210\206\347\261\273/config.json" index cead11da45b77cdb3c23a7b5d6354bb3810f574a..ef5f2a0b0f9a340e76d409740c982a6474e283f7 100644 --- "a/data/1.AI\345\210\235\351\230\266/3.\347\272\277\346\200\247\345\210\206\347\261\273/config.json" +++ "b/data/1.AI\345\210\235\351\230\266/3.\347\272\277\346\200\247\345\210\206\347\261\273/config.json" @@ -1,4 +1,6 @@ { "node_id": "ai-d7c91624cb92446786eeaad0cd336445", - "keywords": [] + "keywords": [], + "keywords_must": [], + "keywords_forbid": [] } \ No newline at end of file diff --git "a/data/1.AI\345\210\235\351\230\266/config.json" "b/data/1.AI\345\210\235\351\230\266/config.json" index bc8440d21eaba2865acb2981b412cfb00d1b20cc..a877b6d1cda822e9e1831d87f960220b7540d6fa 100644 --- "a/data/1.AI\345\210\235\351\230\266/config.json" +++ "b/data/1.AI\345\210\235\351\230\266/config.json" @@ -1,4 +1,6 @@ { "node_id": "ai-7c98592cf49347b69cc10b653731bd16", - "keywords": [] + "keywords": [], + "keywords_must": [], + "keywords_forbid": [] } \ No newline at end of file diff --git "a/data/2.AI\344\270\255\351\230\266/config.json" "b/data/2.AI\344\270\255\351\230\266/config.json" index a039167ba39b6189ed1c3d08cc6c9473846ff104..92076b5e3429337bc1174cb27d0ff0cbfb837060 100644 --- "a/data/2.AI\344\270\255\351\230\266/config.json" +++ "b/data/2.AI\344\270\255\351\230\266/config.json" @@ -1,4 +1,6 @@ { "node_id": "ai-8b462755b2014f90bff16ec87d2fb84c", - "keywords": [] + "keywords": [], + "keywords_must": [], + "keywords_forbid": [] } \ No newline at end of file diff --git "a/data/3.AI\351\253\230\351\230\266/config.json" "b/data/3.AI\351\253\230\351\230\266/config.json" index 738807655bfb8a3485436909a5ed0ab4038ae97b..3a255e61f2ff6d94ed1bf98166a62128a20f3c3b 100644 --- "a/data/3.AI\351\253\230\351\230\266/config.json" +++ "b/data/3.AI\351\253\230\351\230\266/config.json" @@ -1,4 +1,6 @@ { "node_id": "ai-de60cc83f32541499c62e182ac952d83", - "keywords": [] + "keywords": [], + "keywords_must": [], + "keywords_forbid": [] } \ No newline at end of file diff --git a/data/config.json b/data/config.json index 1ce16d7fed9f5c967175883269afbbd1a6e87dfb..e5c2376c4e5e8f6743353a88fe7e3aa08a978a27 100644 --- a/data/config.json +++ b/data/config.json @@ -1,5 +1,7 @@ { "tree_name": "ai", "keywords": [], - "node_id": "ai-e199f3e521db4347a8bc662f8f33ca6c" + "node_id": "ai-e199f3e521db4347a8bc662f8f33ca6c", + "keywords_must": [], + "keywords_forbid": [] } \ No newline at end of file diff --git a/data/tree.json b/data/tree.json index c952422bd4e278ccab46edd2c18419b961ba5bc0..a75d0ccb29c08bf2959e431f04f805df9ff1ff70 100644 --- a/data/tree.json +++ b/data/tree.json @@ -17,57 +17,90 @@ "AI简介": { "node_id": "ai-3387d5d7a7684fbb9187e26d6d8d187b", "keywords": [], - "children": [] + "children": [ + { + "AI简史": { + "keywords": [ + "AI起源", + "人工智能简史" + ], + "children": [], + "keywords_must": [], + "keywords_forbid": [], + "node_id": "ai-a0605ecbad3741169541ebc6ce1b0d13" + } + } + ], + "keywords_must": [], + "keywords_forbid": [] } }, { "线性反向传播": { "node_id": "ai-861408a897f042fd8044bfc9838d2747", "keywords": [], - "children": [] + "children": [], + "keywords_must": [], + "keywords_forbid": [] } }, { "梯度下降": { "node_id": "ai-8deab4930eef40b0bd9c2337e7ad5c51", "keywords": [], - "children": [] + "children": [], + "keywords_must": [], + "keywords_forbid": [] } } - ] + ], + "keywords_must": [], + "keywords_forbid": [] } }, { "线性回归": { "node_id": "ai-f51cf279b2c94e099da0f3e1fcfc793e", "keywords": [], - "children": [] + "children": [], + "keywords_must": [], + "keywords_forbid": [] } }, { "线性分类": { "node_id": "ai-d7c91624cb92446786eeaad0cd336445", "keywords": [], - "children": [] + "children": [], + "keywords_must": [], + "keywords_forbid": [] } } - ] + ], + "keywords_must": [], + "keywords_forbid": [] } }, { "AI中阶": { "node_id": "ai-8b462755b2014f90bff16ec87d2fb84c", "keywords": [], - "children": [] + "children": [], + "keywords_must": [], + "keywords_forbid": [] } }, { "AI高阶": { "node_id": "ai-de60cc83f32541499c62e182ac952d83", "keywords": [], - "children": [] + "children": [], + "keywords_must": [], + "keywords_forbid": [] } } - ] + ], + "keywords_must": [], + "keywords_forbid": [] } } \ No newline at end of file diff --git a/src/tree.py b/src/tree.py index 95e1cbc5812810e30465eccc2632464caa5438e7..0c58d32b0da0e7099c2d2804fc97cdc35b240e28 100644 --- a/src/tree.py +++ b/src/tree.py @@ -1,10 +1,10 @@ -# -*- coding: utf-8 -*- -import logging -from genericpath import exists import json +import logging import os -import uuid +import re +import subprocess import sys +import uuid import re id_set = set() @@ -16,8 +16,29 @@ handler.setFormatter(formatter) logger.addHandler(handler) +def search_author(author_dict, username): + for key in author_dict: + names = author_dict[key] + if username in names: + return key + return username + + +def user_name(md_file, author_dict): + ret = subprocess.Popen([ + "git", "log", md_file + ], stdout=subprocess.PIPE) + lines = list(map(lambda l: l.decode(), ret.stdout.readlines())) + author_lines = [] + for line in lines: + if line.startswith('Author'): + author_lines.append(line.split(' ')[1]) + author_nick_name = author_lines[-1] + return search_author(author_dict, author_nick_name) + + def load_json(p): - with open(p, 'r', encoding='utf-8') as f: + with open(p, 'r', encoding="utf-8") as f: return json.loads(f.read()) @@ -30,7 +51,7 @@ def dump_json(p, j, exist_ok=False, override=False): logger.error(f"{p} already exist") sys.exit(0) - with open(p, 'w+', encoding='utf-8') as f: + with open(p, 'w+', encoding="utf8") as f: f.write(json.dumps(j, indent=2, ensure_ascii=False)) @@ -72,7 +93,18 @@ def check_export(base, cfg): class TreeWalker: - def __init__(self, root, tree_name, title=None, log=None): + def __init__( + self, root, + tree_name, + title=None, + log=None, + authors=None, + enable_notebook=None, + ignore_keywords=False + ): + self.ignore_keywords = ignore_keywords + self.authors = authors if authors else {} + self.enable_notebook = enable_notebook self.name = tree_name self.root = root self.title = tree_name if title is None else title @@ -84,7 +116,9 @@ class TreeWalker: root_node = { "node_id": root["node_id"], "keywords": root["keywords"], - "children": [] + "children": [], + "keywords_must": root["keywords_must"], + "keywords_forbid": root["keywords_forbid"] } self.tree[root["tree_name"]] = root_node self.load_levels(root_node) @@ -92,25 +126,31 @@ class TreeWalker: for index, level in enumerate(root_node["children"]): level_title = list(level.keys())[0] level_node = list(level.values())[0] - level_path = os.path.join(self.root, f"{index+1}.{level_title}") + level_path = os.path.join(self.root, f"{index + 1}.{level_title}") self.load_chapters(level_path, level_node) for index, chapter in enumerate(level_node["children"]): chapter_title = list(chapter.keys())[0] chapter_node = list(chapter.values())[0] chapter_path = os.path.join( - level_path, f"{index+1}.{chapter_title}") + level_path, f"{index + 1}.{chapter_title}") self.load_sections(chapter_path, chapter_node) for index, section_node in enumerate(chapter_node["children"]): section_title = list(section_node.keys())[0] full_path = os.path.join( - chapter_path, f"{index}.{section_title}") + chapter_path, f"{index + 1}.{section_title}") if os.path.isdir(full_path): + self.check_section_keywords(full_path) self.ensure_exercises(full_path) tree_path = os.path.join(self.root, "tree.json") dump_json(tree_path, self.tree, exist_ok=True, override=True) return self.tree + def sort_dir_list(self, dirs): + result = [self.extract_node_env(dir) for dir in dirs] + result.sort(key=lambda item: item[0]) + return result + def load_levels(self, root_node): levels = [] for level in os.listdir(self.root): @@ -133,6 +173,8 @@ class TreeWalker: "node_id": config["node_id"], "keywords": config["keywords"], "children": [], + "keywords_must": config["keywords_must"], + "keywords_forbid": config["keywords_forbid"] } } @@ -167,7 +209,7 @@ class TreeWalker: for index, [number, element] in enumerate(children): title = list(element.keys())[0] origin = os.path.join(base, f"{number}.{title}") - posted = os.path.join(base, f"{index+1}.{title}") + posted = os.path.join(base, f"{index + 1}.{title}") if origin != posted: self.logger.info(f"rename [{origin}] to [{posted}]") os.rename(origin, posted) @@ -184,6 +226,8 @@ class TreeWalker: "tree_name": self.name, "keywords": [], "node_id": self.gen_node_id(), + "keywords_must": [], + "keywords_forbid": [] } dump_json(config_path, config, exist_ok=True, override=True) else: @@ -213,7 +257,9 @@ class TreeWalker: if not os.path.exists(config_path): config = { "node_id": self.gen_node_id(), - "keywords": [] + "keywords": [], + "keywords_must": [], + "keywords_forbid": [] } dump_json(config_path, config, exist_ok=True, override=True) else: @@ -237,15 +283,25 @@ class TreeWalker: config = load_json(config_path) flag, result = self.ensure_node_id(config) if flag: - dump_json(config_path, config, exist_ok=True, override=True) + dump_json(config_path, result, exist_ok=True, override=True) return config def ensure_node_id(self, config): - if "node_id" not in config: - config["node_id"] = self.gen_node_id() - return True, config - else: - return False, config + flag = False + if "node_id" not in config or \ + not config["node_id"].startswith(f"{self.name}-") or \ + config["node_id"] in id_set: + new_id = self.gen_node_id() + id_set.add(new_id) + config["node_id"] = new_id + flag = True + + for child in config.get("children", []): + child_node = list(child.values())[0] + f, _ = self.ensure_node_id(child_node) + flag = flag or f + + return flag, config def gen_node_id(self): return f"{self.name}-{uuid.uuid4().hex}" @@ -258,7 +314,8 @@ class TreeWalker: return int(number), title except Exception as error: self.logger.error(f"目录 [{path}] 解析失败,结构不合法,可能是缺少序号") - sys.exit(1) + # sys.exit(1) + raise error def load_chapter_node(self, full_name): config = self.ensure_chapter_config(full_name) @@ -268,6 +325,8 @@ class TreeWalker: "node_id": config["node_id"], "keywords": config["keywords"], "children": [], + "keywords_must": config["keywords_must"], + "keywords_forbid": config["keywords_forbid"] } } return num, result @@ -279,7 +338,9 @@ class TreeWalker: name: { "node_id": config["node_id"], "keywords": config["keywords"], - "children": config.get("children", []) + "children": config.get("children", []), + "keywords_must": config["keywords_must"], + "keywords_forbid": config["keywords_forbid"] } } # if "children" in config: @@ -288,9 +349,77 @@ class TreeWalker: def ensure_exercises(self, section_path): config = self.ensure_section_config(section_path) + flag = False + for e in os.listdir(section_path): + base, ext = os.path.splitext(e) + _, source = os.path.split(e) + if ext != ".md": + continue + mfile = base + ".json" + meta_path = os.path.join(section_path, mfile) + md_file = os.path.join(section_path, e) + self.ensure_exercises_meta(meta_path, source, md_file) + export = config.get("export", []) + if mfile not in export and self.name != "algorithm": + export.append(mfile) + flag = True + config["export"] = export + + if flag: + dump_json(os.path.join(section_path, "config.json"), + config, True, True) + for e in config.get("export", []): full_name = os.path.join(section_path, e) exercise = load_json(full_name) - if "exercise_id" not in exercise: - exercise["exercise_id"] = uuid.uuid4().hex - dump_json(full_name, exercise) + if "exercise_id" not in exercise or exercise.get("exercise_id") in id_set: + eid = uuid.uuid4().hex + exercise["exercise_id"] = eid + dump_json(full_name, exercise, True, True) + else: + id_set.add(exercise["exercise_id"]) + + def ensure_exercises_meta(self, meta_path, source, md_file): + _, mfile = os.path.split(meta_path) + meta = None + if os.path.exists(meta_path): + with open(meta_path) as f: + content = f.read() + if content: + meta = json.loads(content) + if "exercise_id" not in meta: + meta["exercise_id"] = uuid.uuid4().hex + if "notebook_enable" not in meta: + meta["notebook_enable"] = self.default_notebook() + if "source" not in meta: + meta["source"] = source + if "author" not in meta: + meta["author"] = user_name(md_file, self.authors) + if "type" not in meta: + meta["type"] = "code_options" + + if meta is None: + meta = { + "type": "code_options", + "author": user_name(md_file, self.authors), + "source": source, + "notebook_enable": self.default_notebook(), + "exercise_id": uuid.uuid4().hex + } + dump_json(meta_path, meta, True, True) + + def default_notebook(self): + if self.enable_notebook is not None: + return self.enable_notebook + if self.name in ["python", "java", "c"]: + return True + else: + return False + + def check_section_keywords(self, full_path): + if self.ignore_keywords: + return + config = self.ensure_section_config(full_path) + if not config.get("keywords", []): + self.logger.error(f"节点 [{full_path}] 的关键字为空,请修改配置文件写入关键字") + sys.exit(1)