提交 a4977579 编写于 作者: L luxin

add keywords_must and keywords_forbid

上级 1bc3732d
{
"node_id": "neo4j-0261ccb903994df281a2ec606b5d8c9e",
"keywords": [],
"children": [
{
"什么是图数据库": {
"keywords": [
"图数据库"
],
"children": [
{
"图论": {
"keywords": [
"节点",
"边",
"关系"
],
"children": [],
"keywords_must": [],
"keywords_forbid": [],
"node_id": "neo4j-3250e8d6f7fb417c83a8f007fc842a83"
}
},
{
"RDF": {
"keywords": [],
"children": [],
"keywords_must": [],
"keywords_forbid": [],
"node_id": "neo4j-80467f91b8454c029ac268ec80d934ec"
}
},
{
"属性图": {
"keywords": [],
"children": [],
"keywords_must": [],
"keywords_forbid": [],
"node_id": "neo4j-7e028f10298f4fd5ae055a8f05e375cb"
}
},
{
"原生图": {
"keywords": [],
"children": [],
"keywords_must": [],
"keywords_forbid": [],
"node_id": "neo4j-c462f5300d4740148067a2250766ef90"
}
}
],
"keywords_must": [],
"keywords_forbid": [],
"node_id": "neo4j-013ee17e238c4c51b3988ba109d1fc3d"
},
"什么时候需要图数据库": {
"keywords": [
"图数据库"
],
"children": [],
"keywords_must": [],
"keywords_forbid": []
},
"Neo4J图数据库概览": {
"keywords": [
"图数据库"
],
"children": [],
"keywords_must": [],
"keywords_forbid": []
}
}
],
"export": [
"helloworld.json"
],
"keywords_must": [],
"keywords_forbid": []
}
\ No newline at end of file
{
"node_id": "neo4j-0ee8cb8ccd6f4a59bc20f9ccbf7d627e",
"keywords": [],
"children": [],
"export": [],
"keywords_must": [],
"keywords_forbid": []
}
\ No newline at end of file
{
"node_id": "neo4j-a42252d5f8c24548bde127a385850a76",
"keywords": [],
"keywords_must": [],
"keywords_forbid": []
}
\ No newline at end of file
{
"node_id": "neo4j-5e171793d38e49e784f544a9f80d09cb",
"keywords": [],
"keywords_must": [],
"keywords_forbid": []
}
\ No newline at end of file
......@@ -2,5 +2,6 @@
"type": "code_options",
"author": "shiny",
"source": "databases.md",
"notebook_enable": false
"notebook_enable": false,
"exercise_id": "d22f8fb9ed0d4afebe55d5b62abb7808"
}
\ No newline at end of file
{
"node_id": "neo4j-9ec466f015f9422dab2b6b05f0581a8b",
"keywords": []
"keywords": [],
"keywords_must": [],
"keywords_forbid": []
}
\ No newline at end of file
{
"node_id": "neo4j-298b201de8044453a2d6e8d02e64962d",
"keywords": [],
"keywords_must": [],
"keywords_forbid": []
}
\ No newline at end of file
{
"node_id": "neo4j-b05c040c3bfe49e29f17397e9e16c7d4",
"keywords": [],
"keywords_must": [],
"keywords_forbid": []
}
\ No newline at end of file
{
"tree_name": "neo4j",
"keywords": ["图数据库", "Neo4j", "图数据平台", "图机器学习", "图算法", "NoSQL", "Graph", "Graph Database", "Graph Data Science", "GDS"],
"node_id": "neo4j-50ecfa9d2d0f4012ae80a3656c0756ab"
"node_id": "neo4j-50ecfa9d2d0f4012ae80a3656c0756ab",
"keywords_must": [],
"keywords_forbid": []
}
\ No newline at end of file
......@@ -17,43 +17,128 @@
"Neo4j简介": {
"node_id": "neo4j-0261ccb903994df281a2ec606b5d8c9e",
"keywords": [],
"children": []
"children": [
{
"什么是图数据库": {
"keywords": [
"图数据库"
],
"children": [
{
"图论": {
"keywords": [
"节点",
"边",
"关系"
],
"children": [],
"keywords_must": [],
"keywords_forbid": [],
"node_id": "neo4j-3250e8d6f7fb417c83a8f007fc842a83"
}
},
{
"RDF": {
"keywords": [],
"children": [],
"keywords_must": [],
"keywords_forbid": [],
"node_id": "neo4j-80467f91b8454c029ac268ec80d934ec"
}
},
{
"属性图": {
"keywords": [],
"children": [],
"keywords_must": [],
"keywords_forbid": [],
"node_id": "neo4j-7e028f10298f4fd5ae055a8f05e375cb"
}
},
{
"原生图": {
"keywords": [],
"children": [],
"keywords_must": [],
"keywords_forbid": [],
"node_id": "neo4j-c462f5300d4740148067a2250766ef90"
}
}
],
"keywords_must": [],
"keywords_forbid": [],
"node_id": "neo4j-013ee17e238c4c51b3988ba109d1fc3d"
},
"什么时候需要图数据库": {
"keywords": [
"图数据库"
],
"children": [],
"keywords_must": [],
"keywords_forbid": []
},
"Neo4J图数据库概览": {
"keywords": [
"图数据库"
],
"children": [],
"keywords_must": [],
"keywords_forbid": []
}
}
],
"keywords_must": [],
"keywords_forbid": []
}
},
{
"安装和启动": {
"node_id": "neo4j-0ee8cb8ccd6f4a59bc20f9ccbf7d627e",
"keywords": [],
"children": []
"children": [],
"keywords_must": [],
"keywords_forbid": []
}
}
]
],
"keywords_must": [],
"keywords_forbid": []
}
},
{
"Cypher查询语言": {
"node_id": "neo4j-9ec466f015f9422dab2b6b05f0581a8b",
"keywords": [],
"children": []
"children": [],
"keywords_must": [],
"keywords_forbid": []
}
}
]
],
"keywords_must": [],
"keywords_forbid": []
}
},
{
"Neo4j中阶": {
"node_id": "neo4j-298b201de8044453a2d6e8d02e64962d",
"keywords": [],
"children": []
"children": [],
"keywords_must": [],
"keywords_forbid": []
}
},
{
"Neo4j高阶": {
"node_id": "neo4j-b05c040c3bfe49e29f17397e9e16c7d4",
"keywords": [],
"children": []
"children": [],
"keywords_must": [],
"keywords_forbid": []
}
}
]
],
"keywords_must": [],
"keywords_forbid": []
}
}
\ No newline at end of file
# -*- coding: utf-8 -*-
import logging
from genericpath import exists
import json
import logging
import os
import uuid
import re
import subprocess
import sys
import uuid
import re
id_set = set()
......@@ -16,8 +16,29 @@ handler.setFormatter(formatter)
logger.addHandler(handler)
def search_author(author_dict, username):
for key in author_dict:
names = author_dict[key]
if username in names:
return key
return username
def user_name(md_file, author_dict):
ret = subprocess.Popen([
"git", "log", md_file
], stdout=subprocess.PIPE)
lines = list(map(lambda l: l.decode(), ret.stdout.readlines()))
author_lines = []
for line in lines:
if line.startswith('Author'):
author_lines.append(line.split(' ')[1])
author_nick_name = author_lines[-1]
return search_author(author_dict, author_nick_name)
def load_json(p):
with open(p, 'r', encoding='utf-8') as f:
with open(p, 'r', encoding="utf-8") as f:
return json.loads(f.read())
......@@ -30,7 +51,7 @@ def dump_json(p, j, exist_ok=False, override=False):
logger.error(f"{p} already exist")
sys.exit(0)
with open(p, 'w+', encoding='utf-8') as f:
with open(p, 'w+', encoding="utf8") as f:
f.write(json.dumps(j, indent=2, ensure_ascii=False))
......@@ -72,7 +93,18 @@ def check_export(base, cfg):
class TreeWalker:
def __init__(self, root, tree_name, title=None, log=None):
def __init__(
self, root,
tree_name,
title=None,
log=None,
authors=None,
enable_notebook=None,
ignore_keywords=False
):
self.ignore_keywords = ignore_keywords
self.authors = authors if authors else {}
self.enable_notebook = enable_notebook
self.name = tree_name
self.root = root
self.title = tree_name if title is None else title
......@@ -84,7 +116,9 @@ class TreeWalker:
root_node = {
"node_id": root["node_id"],
"keywords": root["keywords"],
"children": []
"children": [],
"keywords_must": root["keywords_must"],
"keywords_forbid": root["keywords_forbid"]
}
self.tree[root["tree_name"]] = root_node
self.load_levels(root_node)
......@@ -92,25 +126,31 @@ class TreeWalker:
for index, level in enumerate(root_node["children"]):
level_title = list(level.keys())[0]
level_node = list(level.values())[0]
level_path = os.path.join(self.root, f"{index+1}.{level_title}")
level_path = os.path.join(self.root, f"{index + 1}.{level_title}")
self.load_chapters(level_path, level_node)
for index, chapter in enumerate(level_node["children"]):
chapter_title = list(chapter.keys())[0]
chapter_node = list(chapter.values())[0]
chapter_path = os.path.join(
level_path, f"{index+1}.{chapter_title}")
level_path, f"{index + 1}.{chapter_title}")
self.load_sections(chapter_path, chapter_node)
for index, section_node in enumerate(chapter_node["children"]):
section_title = list(section_node.keys())[0]
full_path = os.path.join(
chapter_path, f"{index}.{section_title}")
chapter_path, f"{index + 1}.{section_title}")
if os.path.isdir(full_path):
self.check_section_keywords(full_path)
self.ensure_exercises(full_path)
tree_path = os.path.join(self.root, "tree.json")
dump_json(tree_path, self.tree, exist_ok=True, override=True)
return self.tree
def sort_dir_list(self, dirs):
result = [self.extract_node_env(dir) for dir in dirs]
result.sort(key=lambda item: item[0])
return result
def load_levels(self, root_node):
levels = []
for level in os.listdir(self.root):
......@@ -133,6 +173,8 @@ class TreeWalker:
"node_id": config["node_id"],
"keywords": config["keywords"],
"children": [],
"keywords_must": config["keywords_must"],
"keywords_forbid": config["keywords_forbid"]
}
}
......@@ -167,7 +209,7 @@ class TreeWalker:
for index, [number, element] in enumerate(children):
title = list(element.keys())[0]
origin = os.path.join(base, f"{number}.{title}")
posted = os.path.join(base, f"{index+1}.{title}")
posted = os.path.join(base, f"{index + 1}.{title}")
if origin != posted:
self.logger.info(f"rename [{origin}] to [{posted}]")
os.rename(origin, posted)
......@@ -184,6 +226,8 @@ class TreeWalker:
"tree_name": self.name,
"keywords": [],
"node_id": self.gen_node_id(),
"keywords_must": [],
"keywords_forbid": []
}
dump_json(config_path, config, exist_ok=True, override=True)
else:
......@@ -213,7 +257,9 @@ class TreeWalker:
if not os.path.exists(config_path):
config = {
"node_id": self.gen_node_id(),
"keywords": []
"keywords": [],
"keywords_must": [],
"keywords_forbid": []
}
dump_json(config_path, config, exist_ok=True, override=True)
else:
......@@ -237,15 +283,25 @@ class TreeWalker:
config = load_json(config_path)
flag, result = self.ensure_node_id(config)
if flag:
dump_json(config_path, config, exist_ok=True, override=True)
dump_json(config_path, result, exist_ok=True, override=True)
return config
def ensure_node_id(self, config):
if "node_id" not in config:
config["node_id"] = self.gen_node_id()
return True, config
else:
return False, config
flag = False
if "node_id" not in config or \
not config["node_id"].startswith(f"{self.name}-") or \
config["node_id"] in id_set:
new_id = self.gen_node_id()
id_set.add(new_id)
config["node_id"] = new_id
flag = True
for child in config.get("children", []):
child_node = list(child.values())[0]
f, _ = self.ensure_node_id(child_node)
flag = flag or f
return flag, config
def gen_node_id(self):
return f"{self.name}-{uuid.uuid4().hex}"
......@@ -258,7 +314,8 @@ class TreeWalker:
return int(number), title
except Exception as error:
self.logger.error(f"目录 [{path}] 解析失败,结构不合法,可能是缺少序号")
sys.exit(1)
# sys.exit(1)
raise error
def load_chapter_node(self, full_name):
config = self.ensure_chapter_config(full_name)
......@@ -268,6 +325,8 @@ class TreeWalker:
"node_id": config["node_id"],
"keywords": config["keywords"],
"children": [],
"keywords_must": config["keywords_must"],
"keywords_forbid": config["keywords_forbid"]
}
}
return num, result
......@@ -279,7 +338,9 @@ class TreeWalker:
name: {
"node_id": config["node_id"],
"keywords": config["keywords"],
"children": config.get("children", [])
"children": config.get("children", []),
"keywords_must": config["keywords_must"],
"keywords_forbid": config["keywords_forbid"]
}
}
# if "children" in config:
......@@ -288,9 +349,77 @@ class TreeWalker:
def ensure_exercises(self, section_path):
config = self.ensure_section_config(section_path)
flag = False
for e in os.listdir(section_path):
base, ext = os.path.splitext(e)
_, source = os.path.split(e)
if ext != ".md":
continue
mfile = base + ".json"
meta_path = os.path.join(section_path, mfile)
md_file = os.path.join(section_path, e)
self.ensure_exercises_meta(meta_path, source, md_file)
export = config.get("export", [])
if mfile not in export and self.name != "algorithm":
export.append(mfile)
flag = True
config["export"] = export
if flag:
dump_json(os.path.join(section_path, "config.json"),
config, True, True)
for e in config.get("export", []):
full_name = os.path.join(section_path, e)
exercise = load_json(full_name)
if "exercise_id" not in exercise:
exercise["exercise_id"] = uuid.uuid4().hex
dump_json(full_name, exercise)
if "exercise_id" not in exercise or exercise.get("exercise_id") in id_set:
eid = uuid.uuid4().hex
exercise["exercise_id"] = eid
dump_json(full_name, exercise, True, True)
else:
id_set.add(exercise["exercise_id"])
def ensure_exercises_meta(self, meta_path, source, md_file):
_, mfile = os.path.split(meta_path)
meta = None
if os.path.exists(meta_path):
with open(meta_path) as f:
content = f.read()
if content:
meta = json.loads(content)
if "exercise_id" not in meta:
meta["exercise_id"] = uuid.uuid4().hex
if "notebook_enable" not in meta:
meta["notebook_enable"] = self.default_notebook()
if "source" not in meta:
meta["source"] = source
if "author" not in meta:
meta["author"] = user_name(md_file, self.authors)
if "type" not in meta:
meta["type"] = "code_options"
if meta is None:
meta = {
"type": "code_options",
"author": user_name(md_file, self.authors),
"source": source,
"notebook_enable": self.default_notebook(),
"exercise_id": uuid.uuid4().hex
}
dump_json(meta_path, meta, True, True)
def default_notebook(self):
if self.enable_notebook is not None:
return self.enable_notebook
if self.name in ["python", "java", "c"]:
return True
else:
return False
def check_section_keywords(self, full_path):
if self.ignore_keywords:
return
config = self.ensure_section_config(full_path)
if not config.get("keywords", []):
self.logger.error(f"节点 [{full_path}] 的关键字为空,请修改配置文件写入关键字")
sys.exit(1)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册