提交 5d691e32 编写于 作者: ToTensor's avatar ToTensor

modify ignore

上级 60bea100
...@@ -10,6 +10,10 @@ test.md ...@@ -10,6 +10,10 @@ test.md
data_backup data_backup
test_dir test_dir
test.html test.html
src /src/*/
./src/*/
helper.py helper.py
main.py main.py
src
data/book_mapping.json
data/topic_mapping/1.Python编程无师自通.json
{
"Python编程无师自通": "c798a5992a654857867ec15660e1c32a",
"全程软件测试(第3版)": "825acb73c85c4c4bb9632afe858bc097",
"深入剖析Nginx": "608fd0c7025a4a34a97a29897b067d24",
"前端体验设计——HTML5+CSS3终极修炼": "c4eeb42b07f54b42a9fd1568b8ec4b98",
"零基础学机器学习": "b88b00f6ad14402ea66695d6809614da"
}
\ No newline at end of file
Title (one line describing the story)
Narrative:
As a [role]
I want [feature]
So that [benefit]
Acceptance criteria: (presented as Scenarios)
Scenario 1: Title
Given [context]
And [some more context]...
When [event]
Then [outcome]
And [another outcome]...
Scenario 2: ...
\ No newline at end of file
public static void main(String[] args) {
// 首先创建一个 FireFox 浏览器webdriver的实例,并让浏览器访问必应
WebDriver driver = new FirefoxDriver();
driver.get("http://cn.bing.com");
// 获取页面的 title
System.out.println("Home Page title: " + driver.getTitle());
// 通过id找到input的web UI元素,并在此域内输入“软件测试”,并提交
WebElement element = driver.findElement(By.id("sb_form_q"));
// 或通过xPath来定位web元素
// element = driver.findElement(By.xpath("*[@id='sb_form_q']"))
// 在此域内输入“软件测试”,并提交
element.sendKeys(软件测试);
element.submit();
// 通过判断 title 内容等待搜索页面加载完毕
(new WebDriverWait(driver, 10)).until(new ExpectedCondition() {
public Boolean apply(WebDriver d) {
return d.getTitle().toLowerCase().endsWith(软件测试);
}
});
System.out.println("Result Page title: " + driver.getTitle());
driver.quit();
}
\ No newline at end of file
4.0.0    
MySel20Proj     
MySel20Proj
1.0
org.seleniumhq.selenium        
selenium-java        
2.53.0
        
org.seleniumhq.selenium        
htmlunit-driver        
2.20
java -jar selenium-server-standalone-x.xx.x.jar -role hub 
java -jar selenium-server-standalone-x.xx.x.jar -role node -port 5551 
java -jar selenium-server-standalone-x.xx.x.jar -role node -port 5552 
……
\ No newline at end of file
public class EditorTest extends
ActivityInstrumentationTestCase2 {
private Solo solo;
public EditorTest(){
super(EditorActivity.class);
}
public void setUp() throws Exception {
solo = new Solo(getInstrumentation(),getActivity());
}
public void testPreferenceIsSaved() throws Exception {
solo.sendKey(Solo.MENU);
solo.clickOnText("More");
solo.clickOnText("Preferences");
solo.clickOnText("Edit File Extensions")
solo.clickOnText(solo.searchText("rtf"));
solo.clickOnText("txt");
solo.clearEditText(2);
solo.enterText(2,"robotium");
solo.clickOnButton("Save");
solo.goBack();
solo.clickOnText("Edit File Extensions");
Assert.assertTrue(solo.searchText("application/robotium"));
}
@Override
public void tearDown() throws Exception {
solo.finishOpenedActivities();
}
\ No newline at end of file
Feature: Refund item
Scenario: Jeff returns a faulty microwave
Given Jeff has bought a microwave for $100
And he has a receipt
When he returns the microwave
Then Jeff should be refunded $100
\ No newline at end of file
import os
import json
import html
import requests
import logging
logger = logging.getLogger(__name__)
def modify_dir_name():
data_dir = 'data'
dir_list_ = os.listdir(data_dir)
# print(dir_list_)
dir_list = []
for i in dir_list_:
root_dir = os.path.join(data_dir, i)
if os.path.isdir(root_dir):
dir_list.append(root_dir)
chapter_dir_list = []
for root_dir in dir_list:
dir_list_ = os.listdir(root_dir)
for i in dir_list_:
root_dir_ = os.path.join(root_dir, i)
if os.path.isdir(root_dir_):
# print(root_dir_)
chapter_dir_list.append(root_dir_)
dst_dir_list = []
for chapter_dir in chapter_dir_list:
exercises_dir = os.listdir(chapter_dir)
for tem_dir in exercises_dir:
exercise_path = os.path.join(chapter_dir, tem_dir)
if os.path.isdir(exercise_path):
# print(exercise_path)
dst_dir_list.append(exercise_path)
for dst_dir in dst_dir_list:
try:
dir_name = dst_dir.split('/')[-1]
root_dir = '/'.join(dst_dir.split('/')[:-1])
res_dir_name = dir_name.split(' ')[1]
number = dir_name.split(' ')[0].split('.')[1] + '.'
res_dir_name = number + res_dir_name
print(dst_dir)
final_name = os.path.join(root_dir, res_dir_name)
print(final_name)
os.rename(dst_dir, final_name)
except:
print(dst_dir)
def get_files_path(file_dir, filetype='.txt'):
"""得到文件夹下的所有.txt文件的路径
Args:
file_dir: 文件夹路径
filetype: 文件后缀
Returns:
所有filetype类型文件的绝对路径
"""
files_path = []
for root, dirs, files in os.walk(file_dir):
for file in files:
if filetype is None or (os.path.splitext(file)[1] == filetype):
files_path.append(os.path.join(root, file))
return files_path
def post(url, params, retry=3, headers=None):
if headers is None:
hdrs = {"Content-Type": "application/json"}
else:
hdrs = headers
fails = 0
while fails < retry:
try:
if headers is None:
data = json.dumps(params)
else:
data = params
logger.debug(f"will post {data} to {url}")
resp = requests.post(url, data, headers=hdrs, timeout=10)
if resp:
logger.info(f"resp {resp.content}")
return resp.json()
else:
logger.error(f"resp: [{resp}]")
fails += 1
except Exception as error:
logger.error(f"post {params} to {url} failed {error}")
fails += 1
if fails > retry:
raise error
def send_topic():
data_dir = 'data'
web_url = "https://codechina.csdn.net/csdn/content/book_code_c798a5992a654857867ec15660e1c32a/-/blob/master/"
request_url = 'http://ccloud.internal.csdn.net/v1/internal/community/content/sendTopic'
files = get_files_path('data/books/1.Python编程无师自通', '.py')
mapping_path = 'data/topic_mapping/1.Python编程无师自通.json'
with open(mapping_path, 'r') as f:
chapter_code_mapping = json.load(f)
for file in files:
topic_title = file.replace('data/books/Python编程无师自通/', '')
topic_title = topic_title.replace('/', '|')
topic_title = topic_title.replace(' ', '.')
# topic_title = html.escape(topic_title)
topic_content = web_url + file
topic_content = "代码:<a href=\"{}\">{}</a>".format(
topic_content, topic_title)
send_topic_request_param = {
"type": "long_text",
"cateId": 20890,
"content": topic_content,
"topicTitle": topic_title,
"mdContent": topic_content,
"communityId": 3597,
"loginUserName": "community_286",
"bizNo": "ebook"
}
if chapter_code_mapping.get(file) is None:
# resp = post(request_url, send_topic_request_param)
# topic_link = resp['data']['content']['url']
# chapter_code_mapping[file] = topic_link
# print('{}:{}'.format(file, topic_link))
print('错误')
else:
send_topic_request_param['id'] = int(
chapter_code_mapping[file].split('/')[-1])
resp = post(request_url, send_topic_request_param)
print('{}:{}'.format(file, chapter_code_mapping.get(file)))
save_mapping = json.dumps(chapter_code_mapping,
ensure_ascii=False,
indent=2)
print(save_mapping)
with open(mapping_path, 'w') as f:
f.write(save_mapping)
send_topic()
\ No newline at end of file
from src.ebook.extract_book_code import extract_code
if __name__ == "__main__":
extract_code()
import json
import requests
import logging
logger = logging.getLogger(__name__)
def get_chapter_content(params):
url = 'http://192.168.50.117:9003/v1/chapter/content'
headers = {
"Cookie":"UserToken=149ba8a7a8d341bbbe41f904c4c9b176;UserName=xiuxiuyayayy"
}
result = requests.get(url=url, params=params, headers=headers)
if result.status_code == 200:
ret = json.loads(result.text)
logger.info('request success')
content = ret['data']
return content
else:
logger.info('request failed!!!!!')
return {}
def get_chapter_list(params):
url = 'http://192.168.50.117:9003/inner/v1/chapter/list'
headers = {
"Cookie":"UserToken=149ba8a7a8d341bbbe41f904c4c9b176;UserName=xiuxiuyayayy"
}
result = requests.get(url=url, params=params, headers=headers)
if result.status_code == 200:
ret = json.loads(result.text)
logger.info('request success')
content = ret['data']
return content
else:
logger.info('request failed!!!!!')
return {}
\ No newline at end of file
import json
import os
import re
import html
from bs4 import BeautifulSoup
from .get_book_chapter_id_list import get_chapter_id_list
from .ebook_get_request import get_chapter_content
def extract_code():
book_mapping_path = "data/book_mapping.json"
with open(book_mapping_path, "r") as f:
book_mapping = json.load(f)
for book_idx, book_name in enumerate(book_mapping.keys()):
if book_name == 'Python编程无师自通':
continue
book_dir_name = '{}.{}'.format(book_idx + 1, book_name)
book_dir = os.path.join('data/books', book_dir_name)
if not os.path.exists(book_dir):
os.mkdir(book_dir)
print(book_dir_name)
book_id = book_mapping[book_name]
request_get_chapter_id_list_params = {"bookId": book_id, "is_main": 1}
chapter_id_list = get_chapter_id_list(
request_get_chapter_id_list_params)
print(chapter_id_list)
for chapter_id in chapter_id_list:
request_get_chapter_content_params = {
'bookId': book_id,
'chapterId': chapter_id
}
chapter_resp = get_chapter_content(
request_get_chapter_content_params)
chapter_name = chapter_resp['name']
chapter_content = chapter_resp['content']
try:
if book_name == "零基础学机器学习":
chapter_num = re.findall(r'第(.*)课', chapter_name)[0]
chapter_name_modify = re.sub(
r'第(.*)课', r'第{}课'.format(chapter_num.zfill(2)),
chapter_name)
else:
chapter_num = re.findall(r'第(.*)章', chapter_name)[0]
chapter_name_modify = re.sub(
r'第(.*)章', r'第{}章'.format(chapter_num.zfill(2)),
chapter_name)
chapter_name = chapter_name_modify
except:
# print('该章节没有章节序号: {}'.format(chapter_name))
pass
chapter_dir = os.path.join(book_dir, chapter_name)
if not os.path.exists(chapter_dir):
os.mkdir(chapter_dir)
# print('创建文件夹: {}'.format(chapter_dir))
chapter_content = html.unescape(chapter_content)
if book_name == "全程软件测试(第3版)":
section_list = re.findall(r'<h2.*>(.*?)</h2>', chapter_content)
section_content_list = re.split(r'<h2.*?>.*?</h2>',
chapter_content,
flags=re.S)
section_dir_list = []
for section in section_list:
section = section.replace(' ', ' ')
if section.find(r'/') != -1:
section = section.replace('/', '')
section_dir = os.path.join(chapter_dir, section)
print(section_dir)
if not os.path.exists(section_dir):
os.mkdir(section_dir)
section_dir_list.append(section_dir)
for idx, section_content in enumerate(section_content_list):
if idx == 0:
save_dir = os.path.join(chapter_dir, 'text.html')
else:
save_dir = os.path.join(section_dir_list[idx - 1],
'text.html')
code_list = re.findall(r'<code>(.*?)</code>',
section_content, re.S)
res_code_list = []
count = 0
for i in code_list:
if len(i.split('\n')) < 2:
continue
count += 1
i = html.unescape(i)
soup = BeautifulSoup(i)
res_str = soup.get_text()
if idx == 0:
code_save_dir = os.path.join(
chapter_dir, 'code_0.java')
else:
code_save_dir = os.path.join(
section_dir_list[idx - 1],
'code_{}.java'.format(count))
print(res_str)
with open(code_save_dir, 'w', encoding='utf-8') as f:
f.write(res_str)
# clean_text_list = []
# for line in res_str.split('\n'):
# if line == '':
# continue
# if line[0].isdigit():
# line = re.findall(r'^[0-9]+ {0,2}(.*)',
# line)[0]
# # print(line)
# else:
# if line.startswith('>>'):
# break
# clean_text_list.append(line)
# clean_code = '\n'.join(clean_text_list)
# print(clean_code)
\ No newline at end of file
import json
import re
import html
import nltk
import html2text
import os
import pandas as pd
from bs4 import BeautifulSoup
from .ebook_get_request import get_chapter_list
def get_chapter_id_list(param):
chapter_list = []
ret = get_chapter_list(param)
for item in ret:
chapterid = item['chapterid']
chapter_list.append(chapterid)
return chapter_list
import json
import re
import html
import nltk
import html2text
import os
import pandas as pd
from bs4 import BeautifulSoup
from src.extract.get_book_content import get_chapter_content
def extract_structure():
params = {
"bookId": "c798a5992a654857867ec15660e1c32a",
}
book_path = '20211203Python编程无师自通.csv'
book_data = pd.read_csv(book_path)
chapterid_list = book_data['chapterid']
for chapter_id in chapterid_list:
# chapter_id = 87
params['chapterId'] = chapter_id
res = get_chapter_content(params)
chapter_name = res['name']
chapter_dir = './test_dir/{}'.format(chapter_name)
try:
chapter_num = re.findall(r'第(.*)章', chapter_dir)[0]
print(chapter_num.zfill(2))
chapter_dir = re.sub(r'第(.*)章', r'第{}章'.format(chapter_num.zfill(2)), chapter_dir)
print(chapter_dir)
except:
continue
if not os.path.exists(chapter_dir):
os.mkdir(chapter_dir)
content = res['content']
content = html.unescape(content)
section_list = re.findall(r'<h2.*>(.*?)</h2>', content)
section_content_list = re.split(r'<h2.*?>.*?</h2>', content, flags=re.S)
section_dir_list = []
for section in section_list:
section_dir = os.path.join(chapter_dir, section)
if not os.path.exists(section_dir):
os.mkdir(section_dir)
section_dir_list.append(section_dir)
# print(section_dir_list)
for idx, section_content in enumerate(section_content_list):
if idx == 0:
save_dir = os.path.join(chapter_dir, 'text.html')
else:
save_dir = os.path.join(section_dir_list[idx-1], 'text.html')
# with open(save_dir, 'w', encoding='utf-8') as f:
# f.write(section_content)
code_list = re.findall(r'<code>(.*?)</code>', section_content, re.S)
res_code_list = []
count = 0
for i in code_list:
if len(i.split('\n')) < 2:
continue
count+=1
i = html.unescape(i)
soup = BeautifulSoup(i)
res_str = soup.get_text()
if idx == 0:
code_save_dir = os.path.join(chapter_dir, 'code_0.py')
else:
code_save_dir = os.path.join(section_dir_list[idx-1], 'code_{}.py'.format(count))
clean_text_list = []
for line in res_str.split('\n'):
if line == '':
continue
if line[0].isdigit():
line = re.findall(r'^[0-9]+ {0,2}(.*)', line)[0]
# print(line)
else:
if line.startswith('>>'):
break
clean_text_list.append(line)
clean_code = '\n'.join(clean_text_list)
with open(code_save_dir, 'w', encoding='utf-8') as f:
f.write(clean_code)
import json
import requests
import logging
logger = logging.getLogger(__name__)
def get_chapter_content(params):
url = 'http://192.168.50.117:9003/v1/chapter/content'
headers = {
"Cookie":"UserToken=149ba8a7a8d341bbbe41f904c4c9b176;UserName=xiuxiuyayayy"
}
result = requests.get(url=url, params=params, headers=headers)
if result.status_code == 200:
ret = json.loads(result.text)
logger.info('request success')
content = ret['data']
return content
else:
logger.info('request failed!!!!!')
return {}
\ No newline at end of file
# -*- coding: utf-8 -*-
import logging
from genericpath import exists
import json
import os
import uuid
import sys
import re
id_set = set()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
def load_json(p):
with open(p, 'r', encoding='utf-8') as f:
return json.loads(f.read())
def dump_json(p, j, exist_ok=False, override=False):
if os.path.exists(p):
if exist_ok:
if not override:
return
else:
logger.error(f"{p} already exist")
sys.exit(0)
with open(p, 'w+', encoding='utf-8') as f:
f.write(json.dumps(j, indent=2, ensure_ascii=False))
def ensure_config(path):
config_path = os.path.join(path, "config.json")
if not os.path.exists(config_path):
node = {"keywords": []}
dump_json(config_path, node, exist_ok=True, override=False)
return node
else:
return load_json(config_path)
def parse_no_name(d):
p = r'(\d+)\.(.*)'
m = re.search(p, d)
try:
no = int(m.group(1))
dir_name = m.group(2)
except:
sys.exit(0)
return no, dir_name
def check_export(base, cfg):
flag = False
exports = []
for export in cfg.get('export', []):
ecfg_path = os.path.join(base, export)
if os.path.exists(ecfg_path):
exports.append(export)
else:
flag = True
if flag:
cfg["export"] = exports
return flag
class TreeWalker:
def __init__(self, root, tree_name, title=None, log=None):
self.name = tree_name
self.root = root
self.title = tree_name if title is None else title
self.tree = {}
self.logger = logger if log is None else log
def walk(self):
root = self.load_root()
root_node = {
"node_id": root["node_id"],
"keywords": root["keywords"],
"children": []
}
self.tree[root["tree_name"]] = root_node
self.load_levels(root_node)
self.load_chapters(self.root, root_node)
for index, level in enumerate(root_node["children"]):
level_title = list(level.keys())[0]
level_node = list(level.values())[0]
level_path = os.path.join(self.root, f"{index+1}.{level_title}")
self.load_chapters(level_path, level_node)
for index, chapter in enumerate(level_node["children"]):
chapter_title = list(chapter.keys())[0]
chapter_node = list(chapter.values())[0]
chapter_path = os.path.join(
level_path, f"{index+1}.{chapter_title}")
self.load_sections(chapter_path, chapter_node)
for index, section_node in enumerate(chapter_node["children"]):
section_title = list(section_node.keys())[0]
full_path = os.path.join(
chapter_path, f"{index}.{section_title}")
if os.path.isdir(full_path):
self.ensure_exercises(full_path)
tree_path = os.path.join(self.root, "tree.json")
dump_json(tree_path, self.tree, exist_ok=True, override=True)
return self.tree
def load_levels(self, root_node):
levels = []
for level in os.listdir(self.root):
if not os.path.isdir(level):
continue
level_path = os.path.join(self.root, level)
num, config = self.load_level_node(level_path)
levels.append((num, config))
levels = self.resort_children(self.root, levels)
root_node["children"] = [item[1] for item in levels]
return root_node
def load_level_node(self, level_path):
config = self.ensure_level_config(level_path)
num, name = self.extract_node_env(level_path)
result = {
name: {
"node_id": config["node_id"],
"keywords": config["keywords"],
"children": [],
}
}
return num, result
def load_chapters(self, base, level_node):
chapters = []
for name in os.listdir(base):
full_name = os.path.join(base, name)
if os.path.isdir(full_name):
num, chapter = self.load_chapter_node(full_name)
chapters.append((num, chapter))
chapters = self.resort_children(base, chapters)
level_node["children"] = [item[1] for item in chapters]
return level_node
def load_sections(self, base, chapter_node):
sections = []
for name in os.listdir(base):
full_name = os.path.join(base, name)
if os.path.isdir(full_name):
num, section = self.load_section_node(full_name)
sections.append((num, section))
sections = self.resort_children(base, sections)
chapter_node["children"] = [item[1] for item in sections]
return chapter_node
def resort_children(self, base, children):
children.sort(key=lambda item: item[0])
for index, [number, element] in enumerate(children):
title = list(element.keys())[0]
origin = os.path.join(base, f"{number}.{title}")
posted = os.path.join(base, f"{index+1}.{title}")
if origin != posted:
self.logger.info(f"rename [{origin}] to [{posted}]")
os.rename(origin, posted)
return children
def ensure_chapters(self):
for subdir in os.listdir(self.root):
self.ensure_level_config(subdir)
def load_root(self):
config_path = os.path.join(self.root, "config.json")
if not os.path.exists(config_path):
config = {
"tree_name": self.name,
"keywords": [],
"node_id": self.gen_node_id(),
}
dump_json(config_path, config, exist_ok=True, override=True)
else:
config = load_json(config_path)
flag, result = self.ensure_node_id(config)
if flag:
dump_json(config_path, result, exist_ok=True, override=True)
return config
def ensure_level_config(self, path):
config_path = os.path.join(path, "config.json")
if not os.path.exists(config_path):
config = {
"node_id": self.gen_node_id()
}
dump_json(config_path, config, exist_ok=True, override=True)
else:
config = load_json(config_path)
flag, result = self.ensure_node_id(config)
if flag:
dump_json(config_path, config, exist_ok=True, override=True)
return config
def ensure_chapter_config(self, path):
config_path = os.path.join(path, "config.json")
if not os.path.exists(config_path):
config = {
"node_id": self.gen_node_id(),
"keywords": []
}
dump_json(config_path, config, exist_ok=True, override=True)
else:
config = load_json(config_path)
flag, result = self.ensure_node_id(config)
if flag:
dump_json(config_path, config, exist_ok=True, override=True)
return config
def ensure_section_config(self, path):
config_path = os.path.join(path, "config.json")
if not os.path.exists(config_path):
config = {
"node_id": self.gen_node_id(),
"keywords": [],
"children": [],
"export": []
}
dump_json(config_path, config, exist_ok=True, override=True)
else:
config = load_json(config_path)
flag, result = self.ensure_node_id(config)
if flag:
dump_json(config_path, config, exist_ok=True, override=True)
return config
def ensure_node_id(self, config):
if "node_id" not in config:
config["node_id"] = self.gen_node_id()
return True, config
else:
return False, config
def gen_node_id(self):
return f"{self.name}-{uuid.uuid4().hex}"
def extract_node_env(self, path):
try:
_, dir = os.path.split(path)
self.logger.info(path)
number, title = dir.split(".", 1)
return int(number), title
except Exception as error:
self.logger.error(f"目录 [{path}] 解析失败,结构不合法,可能是缺少序号")
sys.exit(1)
def load_chapter_node(self, full_name):
config = self.ensure_chapter_config(full_name)
num, name = self.extract_node_env(full_name)
result = {
name: {
"node_id": config["node_id"],
"keywords": config["keywords"],
"children": [],
}
}
return num, result
def load_section_node(self, full_name):
config = self.ensure_section_config(full_name)
num, name = self.extract_node_env(full_name)
result = {
name: {
"node_id": config["node_id"],
"keywords": config["keywords"],
"children": config.get("children", [])
}
}
# if "children" in config:
# result["children"] = config["children"]
return num, result
def ensure_exercises(self, section_path):
config = self.ensure_section_config(section_path)
for e in config.get("export", []):
full_name = os.path.join(section_path, e)
exercise = load_json(full_name)
if "exercise_id" not in exercise:
exercise["exercise_id"] = uuid.uuid4().hex
dump_json(full_name, exercise)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册