...
 
Commits (3)
    https://gitcode.net/OpenDocCN/epub-crawler/-/commit/8428f07fda01db0a5dec0b3c5fb22148e1c2d8bb 2023-03-11 11:51:45 2023-03-11T11:51:45+08:00 wizardforcel 562826179@qq.com https://gitcode.net/OpenDocCN/epub-crawler/-/commit/411da0520f7ea0505fa017702d735c906008f331 2023-03-11 11:52:30 2023-03-11T11:52:30+08:00 wizardforcel 562826179@qq.com https://gitcode.net/OpenDocCN/epub-crawler/-/commit/1a9ebdba15be408413ff193135ce022caa6f8fba Merge branch 'master' of github.com:apachecn/epub-crawler 2023-03-11T11:52:48+08:00 wizardforcel 562826179@qq.com
......@@ -4,6 +4,7 @@
from urllib.parse import urljoin
import sys
import json
import yaml
import warnings
from pyquery import PyQuery as pq
import time
......@@ -182,8 +183,25 @@ def main():
if not path.exists(cfg_fname):
print('please provide config file')
return
user_cfg = json.loads(open(cfg_fname, encoding='utf-8').read())
ext = extname(cfg_fname).lower()
cont = open(cfg_fname, encoding='utf-8').read()
if ext == 'json':
user_cfg = json.loads(cont)
elif ext in ['yaml', 'yml']:
user_cfg = yaml.safe_load(cont)
elif ext == 'txt':
urls = [l.strip() for l in cont.split('\n')]
urls = [l for l in urls if l]
name = re.sub('\.\w+$', '', path.basename(cfg_fname))
user_cfg = {
'name': name,
'url': urls[0] if urls else '',
'list': urls,
}
else:
print('配置文件必须为 JSON、YAML 或 TXT')
return
update_config(cfg_fname, user_cfg)
if config['selenium']:
......
......@@ -140,4 +140,8 @@ def size_str_to_int(s):
base = float(m.group(1))
factor = factor_map[m.group(2)]
return int(base * factor)
\ No newline at end of file
return int(base * factor)
def extname(fname):
m = re.search(r'\.(\w+)$', fname)
return m.group(1) if m else ''