add keywords_must and keywords_forbid

06f28fbd · luxin · a72df605 · 06f28fbd · 06f28fbd · 06f28fbd
13 changed file
--- a/data/1.AI初阶/1.预备知识/1.AI简介/config.json
+++ b/data/1.AI初阶/1.预备知识/1.AI简介/config.json
 {
-    "node_id": "ai-3387d5d7a7684fbb9187e26d6d8d187b",
-    "keywords": [],
-    "children": [
-        {
-            "AI简史": {
-                "keywords": [
-                    "AI起源",
-                    "人工智能简史"
-                ],
-                "children": []
-            }
-        }
-    ],
-    "export": [
-        "helloworld.json"
-    ]
+  "node_id": "ai-3387d5d7a7684fbb9187e26d6d8d187b",
+  "keywords": [],
+  "children": [
+    {
+      "AI简史": {
+        "keywords": [
+          "AI起源",
+          "人工智能简史"
+        ],
+        "children": [],
+        "keywords_must": [],
+        "keywords_forbid": [],
+        "node_id": "ai-a0605ecbad3741169541ebc6ce1b0d13"
+      }
+    }
+  ],
+  "export": [
+    "helloworld.json"
+  ],
+  "keywords_must": [],
+  "keywords_forbid": []
 }
\ No newline at end of file
--- a/data/1.AI初阶/1.预备知识/1.AI简介/helloworld.json
+++ b/data/1.AI初阶/1.预备知识/1.AI简介/helloworld.json
 {
-    "type": "code_options",
-    "author": "幻灰龙",
-    "source": "helloworld.md",
-    "notebook_enable": true
+  "type": "code_options",
+  "author": "幻灰龙",
+  "source": "helloworld.md",
+  "notebook_enable": true,
+  "exercise_id": "4b706cfc1b5a48ae8d5865bbcec074ec"
 }
\ No newline at end of file
--- a/data/1.AI初阶/1.预备知识/2.线性反向传播/config.json
+++ b/data/1.AI初阶/1.预备知识/2.线性反向传播/config.json
@@ -2,5 +2,7 @@
  "node_id": "ai-861408a897f042fd8044bfc9838d2747",
  "keywords": [],
  "children": [],
-  "export": []
+  "export": [],
+  "keywords_must": [],
+  "keywords_forbid": []
 }
\ No newline at end of file
--- a/data/1.AI初阶/1.预备知识/3.梯度下降/config.json
+++ b/data/1.AI初阶/1.预备知识/3.梯度下降/config.json
@@ -2,5 +2,7 @@
  "node_id": "ai-8deab4930eef40b0bd9c2337e7ad5c51",
  "keywords": [],
  "children": [],
-  "export": []
+  "export": [],
+  "keywords_must": [],
+  "keywords_forbid": []
 }
\ No newline at end of file
--- a/data/1.AI初阶/1.预备知识/config.json
+++ b/data/1.AI初阶/1.预备知识/config.json
 {
  "node_id": "ai-bc6f05e925e147fd8fca53041f70e022",
-  "keywords": []
+  "keywords": [],
+  "keywords_must": [],
+  "keywords_forbid": []
 }
\ No newline at end of file
--- a/data/1.AI初阶/2.线性回归/config.json
+++ b/data/1.AI初阶/2.线性回归/config.json
 {
  "node_id": "ai-f51cf279b2c94e099da0f3e1fcfc793e",
-  "keywords": []
+  "keywords": [],
+  "keywords_must": [],
+  "keywords_forbid": []
 }
\ No newline at end of file
--- a/data/1.AI初阶/3.线性分类/config.json
+++ b/data/1.AI初阶/3.线性分类/config.json
 {
  "node_id": "ai-d7c91624cb92446786eeaad0cd336445",
-  "keywords": []
+  "keywords": [],
+  "keywords_must": [],
+  "keywords_forbid": []
 }
\ No newline at end of file
--- a/data/1.AI初阶/config.json
+++ b/data/1.AI初阶/config.json
 {
  "node_id": "ai-7c98592cf49347b69cc10b653731bd16",
-  "keywords": []
+  "keywords": [],
+  "keywords_must": [],
+  "keywords_forbid": []
 }
\ No newline at end of file
--- a/data/2.AI中阶/config.json
+++ b/data/2.AI中阶/config.json
 {
  "node_id": "ai-8b462755b2014f90bff16ec87d2fb84c",
-  "keywords": []
+  "keywords": [],
+  "keywords_must": [],
+  "keywords_forbid": []
 }
\ No newline at end of file
--- a/data/3.AI高阶/config.json
+++ b/data/3.AI高阶/config.json
 {
  "node_id": "ai-de60cc83f32541499c62e182ac952d83",
-  "keywords": []
+  "keywords": [],
+  "keywords_must": [],
+  "keywords_forbid": []
 }
\ No newline at end of file
--- a/data/config.json
+++ b/data/config.json
 {
  "tree_name": "ai",
  "keywords": [],
-  "node_id": "ai-e199f3e521db4347a8bc662f8f33ca6c"
+  "node_id": "ai-e199f3e521db4347a8bc662f8f33ca6c",
+  "keywords_must": [],
+  "keywords_forbid": []
 }
\ No newline at end of file
--- a/data/tree.json
+++ b/data/tree.json
@@ -17,57 +17,90 @@
                    "AI简介": {
                      "node_id": "ai-3387d5d7a7684fbb9187e26d6d8d187b",
                      "keywords": [],
-                      "children": []
+                      "children": [
+                        {
+                          "AI简史": {
+                            "keywords": [
+                              "AI起源",
+                              "人工智能简史"
+                            ],
+                            "children": [],
+                            "keywords_must": [],
+                            "keywords_forbid": [],
+                            "node_id": "ai-a0605ecbad3741169541ebc6ce1b0d13"
+                          }
+                        }
+                      ],
+                      "keywords_must": [],
+                      "keywords_forbid": []
                    }
                  },
                  {
                    "线性反向传播": {
                      "node_id": "ai-861408a897f042fd8044bfc9838d2747",
                      "keywords": [],
-                      "children": []
+                      "children": [],
+                      "keywords_must": [],
+                      "keywords_forbid": []
                    }
                  },
                  {
                    "梯度下降": {
                      "node_id": "ai-8deab4930eef40b0bd9c2337e7ad5c51",
                      "keywords": [],
-                      "children": []
+                      "children": [],
+                      "keywords_must": [],
+                      "keywords_forbid": []
                    }
                  }
-                ]
+                ],
+                "keywords_must": [],
+                "keywords_forbid": []
              }
            },
            {
              "线性回归": {
                "node_id": "ai-f51cf279b2c94e099da0f3e1fcfc793e",
                "keywords": [],
-                "children": []
+                "children": [],
+                "keywords_must": [],
+                "keywords_forbid": []
              }
            },
            {
              "线性分类": {
                "node_id": "ai-d7c91624cb92446786eeaad0cd336445",
                "keywords": [],
-                "children": []
+                "children": [],
+                "keywords_must": [],
+                "keywords_forbid": []
              }
            }
-          ]
+          ],
+          "keywords_must": [],
+          "keywords_forbid": []
        }
      },
      {
        "AI中阶": {
          "node_id": "ai-8b462755b2014f90bff16ec87d2fb84c",
          "keywords": [],
-          "children": []
+          "children": [],
+          "keywords_must": [],
+          "keywords_forbid": []
        }
      },
      {
        "AI高阶": {
          "node_id": "ai-de60cc83f32541499c62e182ac952d83",
          "keywords": [],
-          "children": []
+          "children": [],
+          "keywords_must": [],
+          "keywords_forbid": []
        }
      }
-    ]
+    ],
+    "keywords_must": [],
+    "keywords_forbid": []
  }
 }
\ No newline at end of file
--- a/src/tree.py
+++ b/src/tree.py
-# -*- coding: utf-8 -*-
-import logging
-from genericpath import exists
 import json
+import logging
 import os
-import uuid
+import re
+import subprocess
 import sys
+import uuid
 import re

 id_set = set()
@@ -16,8 +16,29 @@ handler.setFormatter(formatter)
 logger.addHandler(handler)


+def search_author(author_dict, username):
+    for key in author_dict:
+        names = author_dict[key]
+        if username in names:
+            return key
+    return username
+
+
+def user_name(md_file, author_dict):
+    ret = subprocess.Popen([
+        "git", "log", md_file
+    ], stdout=subprocess.PIPE)
+    lines = list(map(lambda l: l.decode(), ret.stdout.readlines()))
+    author_lines = []
+    for line in lines:
+        if line.startswith('Author'):
+            author_lines.append(line.split(' ')[1])
+    author_nick_name = author_lines[-1]
+    return search_author(author_dict, author_nick_name)
+
+
 def load_json(p):
-    with open(p, 'r', encoding='utf-8') as f:
+    with open(p, 'r', encoding="utf-8") as f:
        return json.loads(f.read())


@@ -30,7 +51,7 @@ def dump_json(p, j, exist_ok=False, override=False):
            logger.error(f"{p} already exist")
            sys.exit(0)

-    with open(p, 'w+', encoding='utf-8') as f:
+    with open(p, 'w+', encoding="utf8") as f:
        f.write(json.dumps(j, indent=2, ensure_ascii=False))


@@ -72,7 +93,18 @@ def check_export(base, cfg):


 class TreeWalker:
-    def __init__(self, root, tree_name, title=None, log=None):
+    def __init__(
+            self, root,
+            tree_name,
+            title=None,
+            log=None,
+            authors=None,
+            enable_notebook=None,
+            ignore_keywords=False
+    ):
+        self.ignore_keywords = ignore_keywords
+        self.authors = authors if authors else {}
+        self.enable_notebook = enable_notebook
        self.name = tree_name
        self.root = root
        self.title = tree_name if title is None else title
@@ -84,7 +116,9 @@ class TreeWalker:
        root_node = {
            "node_id": root["node_id"],
            "keywords": root["keywords"],
-            "children": []
+            "children": [],
+            "keywords_must": root["keywords_must"],
+            "keywords_forbid": root["keywords_forbid"]
        }
        self.tree[root["tree_name"]] = root_node
        self.load_levels(root_node)
@@ -92,25 +126,31 @@ class TreeWalker:
        for index, level in enumerate(root_node["children"]):
            level_title = list(level.keys())[0]
            level_node = list(level.values())[0]
-            level_path = os.path.join(self.root, f"{index+1}.{level_title}")
+            level_path = os.path.join(self.root, f"{index + 1}.{level_title}")
            self.load_chapters(level_path, level_node)
            for index, chapter in enumerate(level_node["children"]):
                chapter_title = list(chapter.keys())[0]
                chapter_node = list(chapter.values())[0]
                chapter_path = os.path.join(
-                    level_path, f"{index+1}.{chapter_title}")
+                    level_path, f"{index + 1}.{chapter_title}")
                self.load_sections(chapter_path, chapter_node)
                for index, section_node in enumerate(chapter_node["children"]):
                    section_title = list(section_node.keys())[0]
                    full_path = os.path.join(
-                        chapter_path, f"{index}.{section_title}")
+                        chapter_path, f"{index + 1}.{section_title}")
                    if os.path.isdir(full_path):
+                        self.check_section_keywords(full_path)
                        self.ensure_exercises(full_path)

        tree_path = os.path.join(self.root, "tree.json")
        dump_json(tree_path, self.tree, exist_ok=True, override=True)
        return self.tree

+    def sort_dir_list(self, dirs):
+        result = [self.extract_node_env(dir) for dir in dirs]
+        result.sort(key=lambda item: item[0])
+        return result
+
    def load_levels(self, root_node):
        levels = []
        for level in os.listdir(self.root):
@@ -133,6 +173,8 @@ class TreeWalker:
                "node_id": config["node_id"],
                "keywords": config["keywords"],
                "children": [],
+                "keywords_must": config["keywords_must"],
+                "keywords_forbid": config["keywords_forbid"]
            }
        }

@@ -167,7 +209,7 @@ class TreeWalker:
        for index, [number, element] in enumerate(children):
            title = list(element.keys())[0]
            origin = os.path.join(base, f"{number}.{title}")
-            posted = os.path.join(base, f"{index+1}.{title}")
+            posted = os.path.join(base, f"{index + 1}.{title}")
            if origin != posted:
                self.logger.info(f"rename [{origin}] to [{posted}]")
            os.rename(origin, posted)
@@ -184,6 +226,8 @@ class TreeWalker:
                "tree_name": self.name,
                "keywords": [],
                "node_id": self.gen_node_id(),
+                "keywords_must": [],
+                "keywords_forbid": []
            }
            dump_json(config_path, config, exist_ok=True, override=True)
        else:
@@ -213,7 +257,9 @@ class TreeWalker:
        if not os.path.exists(config_path):
            config = {
                "node_id": self.gen_node_id(),
-                "keywords": []
+                "keywords": [],
+                "keywords_must": [],
+                "keywords_forbid": []
            }
            dump_json(config_path, config, exist_ok=True, override=True)
        else:
@@ -237,15 +283,25 @@ class TreeWalker:
            config = load_json(config_path)
            flag, result = self.ensure_node_id(config)
            if flag:
-                dump_json(config_path, config, exist_ok=True, override=True)
+                dump_json(config_path, result, exist_ok=True, override=True)
        return config

    def ensure_node_id(self, config):
-        if "node_id" not in config:
-            config["node_id"] = self.gen_node_id()
-            return True, config
-        else:
-            return False, config
+        flag = False
+        if "node_id" not in config or \
+                not config["node_id"].startswith(f"{self.name}-") or \
+                config["node_id"] in id_set:
+            new_id = self.gen_node_id()
+            id_set.add(new_id)
+            config["node_id"] = new_id
+            flag = True
+
+        for child in config.get("children", []):
+            child_node = list(child.values())[0]
+            f, _ = self.ensure_node_id(child_node)
+            flag = flag or f
+
+        return flag, config

    def gen_node_id(self):
        return f"{self.name}-{uuid.uuid4().hex}"
@@ -258,7 +314,8 @@ class TreeWalker:
            return int(number), title
        except Exception as error:
            self.logger.error(f"目录 [{path}] 解析失败，结构不合法，可能是缺少序号")
-            sys.exit(1)
+            # sys.exit(1)
+            raise error

    def load_chapter_node(self, full_name):
        config = self.ensure_chapter_config(full_name)
@@ -268,6 +325,8 @@ class TreeWalker:
                "node_id": config["node_id"],
                "keywords": config["keywords"],
                "children": [],
+                "keywords_must": config["keywords_must"],
+                "keywords_forbid": config["keywords_forbid"]
            }
        }
        return num, result
@@ -279,7 +338,9 @@ class TreeWalker:
            name: {
                "node_id": config["node_id"],
                "keywords": config["keywords"],
-                "children": config.get("children", [])
+                "children": config.get("children", []),
+                "keywords_must": config["keywords_must"],
+                "keywords_forbid": config["keywords_forbid"]
            }
        }
        # if "children" in config:
@@ -288,9 +349,77 @@ class TreeWalker:

    def ensure_exercises(self, section_path):
        config = self.ensure_section_config(section_path)
+        flag = False
+        for e in os.listdir(section_path):
+            base, ext = os.path.splitext(e)
+            _, source = os.path.split(e)
+            if ext != ".md":
+                continue
+            mfile = base + ".json"
+            meta_path = os.path.join(section_path, mfile)
+            md_file = os.path.join(section_path, e)
+            self.ensure_exercises_meta(meta_path, source, md_file)
+            export = config.get("export", [])
+            if mfile not in export and self.name != "algorithm":
+                export.append(mfile)
+                flag = True
+                config["export"] = export
+
+        if flag:
+            dump_json(os.path.join(section_path, "config.json"),
+                      config, True, True)
+
        for e in config.get("export", []):
            full_name = os.path.join(section_path, e)
            exercise = load_json(full_name)
-            if "exercise_id" not in exercise:
-                exercise["exercise_id"] = uuid.uuid4().hex
-                dump_json(full_name, exercise)
+            if "exercise_id" not in exercise or exercise.get("exercise_id") in id_set:
+                eid = uuid.uuid4().hex
+                exercise["exercise_id"] = eid
+                dump_json(full_name, exercise, True, True)
+            else:
+                id_set.add(exercise["exercise_id"])
+
+    def ensure_exercises_meta(self, meta_path, source, md_file):
+        _, mfile = os.path.split(meta_path)
+        meta = None
+        if os.path.exists(meta_path):
+            with open(meta_path) as f:
+                content = f.read()
+            if content:
+                meta = json.loads(content)
+                if "exercise_id" not in meta:
+                    meta["exercise_id"] = uuid.uuid4().hex
+                if "notebook_enable" not in meta:
+                    meta["notebook_enable"] = self.default_notebook()
+                if "source" not in meta:
+                    meta["source"] = source
+                if "author" not in meta:
+                    meta["author"] = user_name(md_file, self.authors)
+                if "type" not in meta:
+                    meta["type"] = "code_options"
+
+        if meta is None:
+            meta = {
+                "type": "code_options",
+                "author": user_name(md_file, self.authors),
+                "source": source,
+                "notebook_enable": self.default_notebook(),
+                "exercise_id": uuid.uuid4().hex
+            }
+        dump_json(meta_path, meta, True, True)
+
+    def default_notebook(self):
+        if self.enable_notebook is not None:
+            return self.enable_notebook
+        if self.name in ["python", "java", "c"]:
+            return True
+        else:
+            return False
+
+    def check_section_keywords(self, full_path):
+        if self.ignore_keywords:
+            return
+        config = self.ensure_section_config(full_path)
+        if not config.get("keywords", []):
+            self.logger.error(f"节点 [{full_path}] 的关键字为空，请修改配置文件写入关键字")
+            sys.exit(1)