diff --git a/base/rules.db b/base/rules.db index e671074eb342cab903708e871ce0a3f8e487b4b9..47af86169616a5dcea3631bbe6b2031d5bcc3545 100644 Binary files a/base/rules.db and b/base/rules.db differ diff --git a/js/version.txt b/js/version.txt index dcd32c18aed946855e9def9a85583d9b832eb558..00e897bdaeb820b6e16b1a74ed5b7cafe0e94992 100644 --- a/js/version.txt +++ b/js/version.txt @@ -1 +1 @@ -3.8.3 \ No newline at end of file +3.8.2 \ No newline at end of file diff --git a/utils/htmlParser.py b/utils/htmlParser.py index b1df7397a45d53634535671b2324916740a8cd35..23ac23e0f14f6715baff2652a4d55607c998d6fe 100644 --- a/utils/htmlParser.py +++ b/utils/htmlParser.py @@ -11,7 +11,7 @@ import re from jsonpath import jsonpath PARSE_CACHE = True # 解析缓存 -NOADD_INDEX = ':eq|:lt|:gt|^body$|^#' # 不自动加eq下标索引 +NOADD_INDEX = ':eq|:lt|:gt|:first|:last|^body$|^#' # 不自动加eq下标索引 URLJOIN_ATTR = '(url|src|href|-original|-src|-play|-url)$' # 需要自动urljoin的属性 @@ -134,6 +134,11 @@ class jsoup: else: new_parses.append(parse[i]) parse = ' '.join(new_parses) + else: + ps = parse.split(' ')[-1] # 如果带空格就取最后一个元素 + if not self.test(NOADD_INDEX, ps) and first: + parse = f'{parse}:eq(0)' + return parse def pdfa(self, html, parse: str):