diff --git a/utils/htmlParser.py b/utils/htmlParser.py index c81e72eeae3a20753e25dd6b333dd2f42307ed19..e01abd17f751a5d45d272238c6e2de11674c7e38 100644 --- a/utils/htmlParser.py +++ b/utils/htmlParser.py @@ -118,7 +118,8 @@ class jsoup: if excludes and ret: ret = ret.clone() # 克隆一个,免得直接remove会影响doc的缓存 for exclude in excludes: - ret.remove(exclude) + # ret.remove(exclude) + ret(exclude).remove() else: nparse_rule, nparse_index, excludes = self.getParseInfo(nparse) if not ret: @@ -128,7 +129,8 @@ class jsoup: if excludes and ret: ret = ret.clone() # 克隆一个,免得直接remove会影响doc的缓存 for exclude in excludes: - ret.remove(exclude) + # ret.remove(exclude) + ret(exclude).remove() return ret def pdfa(self, html, parse: str):