修复js模式0二级title和desc与js模式1不一致的问题

修复pdfa函数定位的列表数据不对的问题 8号影院js模式0已完美

修复js模式0二级title和desc与js模式1不一致的问题
修复pdfa函数定位的列表数据不对的问题 8号影院js模式0已完美
a9b591aa · hjdhnx · 2502945d · a9b591aa · a9b591aa · a9b591aa
12 changed file
--- a/base/custom.conf
+++ b/base/custom.conf
@@ -30,7 +30,6 @@
 {"key": "dr_剧迷", "name": "剧迷(道长)", "type": 1, "api": "{{host}}/vod?rule=剧迷&ext=txt/js/tg/剧迷.js", "searchable": 2, "quickSearch": 0, "filterable": 0},
 {"key": "dr_大师兄影视", "name": "大师兄影视(道长)", "type": 1, "api": "{{host}}/vod?rule=大师兄影视&ext=txt/js/tg/大师兄影视.js", "searchable": 2, "quickSearch": 0, "filterable": 0},
 {"key": "dr_天空影视", "name": "天空影视(道长)", "type": 1, "api": "{{host}}/vod?rule=天空影视&ext=txt/js/tg/天空影视.js", "searchable": 2, "quickSearch": 0, "filterable": 0},
-{"key": "dr_完美看看", "name": "完美看看(道长)", "type": 1, "api": "{{host}}/vod?rule=完美看看&ext=txt/js/tg/完美看看.js", "searchable": 2, "quickSearch": 0, "filterable": 0},
 {"key": "dr_快云影院", "name": "快云影院(道长)", "type": 1, "api": "{{host}}/vod?rule=快云影院&ext=txt/js/tg/快云影院.js", "searchable": 2, "quickSearch": 0, "filterable": 0},
 {"key": "dr_爱看影视", "name": "爱看影视(道长)", "type": 1, "api": "{{host}}/vod?rule=爱看影视&ext=txt/js/tg/爱看影视.js", "searchable": 2, "quickSearch": 0, "filterable": 0},
 {"key": "dr_爱看电影", "name": "爱看电影(道长)", "type": 1, "api": "{{host}}/vod?rule=爱看电影&ext=txt/js/tg/爱看电影.js", "searchable": 2, "quickSearch": 0, "filterable": 0},

--- a/controllers/cms.py
+++ b/controllers/cms.py
@@ -864,8 +864,7 @@ class CMS:
            pdfa = jsp.pjfa if is_json else jsp.pdfa
            pd = jsp.pj if is_json else jsp.pd
            pq = jsp.pq
-            obj = {}
+            vod['vod_id'] = detailUrl
-            vod_name = ''
            if not html: # 没传递html参数接下来智能获取
                r = requests.get(url, headers=self.headers, timeout=self.timeout,verify=False)
                html = self.checkHtml(r)
@@ -874,16 +873,16 @@ class CMS:
                    html = json.loads(html)
            if p.get('title'):
                p1 = p['title'].split(';')
-                vod_name = pdfh(html, p1[0]).replace('\n', ' ')
+                vod['vod_name'] = pdfh(html, p1[0]).replace('\n', ' ').strip()
-                # title = '\n'.join([pdfh(html,i).replace('\n',' ') for i in p1])
+                vod['type_name'] = pdfh(html, p1[1]).replace('\n',' ').strip() if len(p1)>1 else ''
-                title = '\n'.join([','.join([pdfh(html, pp1).strip() for pp1 in i.split('+')]) for i in p1])
-                # print(title)
-                obj['title'] = title
            if p.get('desc'):
                try:
                    p1 = p['desc'].split(';')
-                    desc = '\n'.join([pdfh(html, i).replace('\n', ' ') for i in p1])
+                    vod['vod_remarks'] = pdfh(html, p1[0]).replace('\n', '').strip()
-                    obj['desc'] = desc
+                    vod['vod_year'] = pdfh(html, p1[1]).replace('\n', ' ').strip() if len(p1) > 1 else ''
+                    vod['vod_area'] = pdfh(html, p1[2]).replace('\n', ' ').strip() if len(p1) > 2 else ''
+                    vod['vod_actor'] = pdfh(html, p1[3]).replace('\n', ' ').strip() if len(p1) > 3 else ''
+                    vod['vod_director'] = pdfh(html, p1[4]).replace('\n', ' ').strip() if len(p1) > 4 else ''
                except:
                    pass
@@ -891,7 +890,7 @@ class CMS:
                p1 = p['content'].split(';')
                try:
                    content = '\n'.join([pdfh(html, i).replace('\n', ' ') for i in p1])
-                    obj['content'] = content
+                    vod['vod_content'] = content
                except:
                    pass
@@ -899,23 +898,10 @@ class CMS:
                p1 = p['img']
                try:
                    img = pd(html, p1)
-                    obj['img'] = img
+                    vod['vod_pic'] = img
                except Exception as e:
                    logger.info(f'二级图片定位失败,但不影响使用{e}')
-            vod = {
-                "vod_id": detailUrl,
-                "vod_name": vod_name,
-                "vod_pic": obj.get('img', ''),
-                "type_name": obj.get('title', ''),
-                "vod_year": "",
-                "vod_area": "",
-                "vod_remarks": obj.get('desc', ''),
-                "vod_actor": "",
-                "vod_director": "",
-                "vod_content": obj.get('content', '')
-            }
            vod_play_from = '$$$'
            playFrom = []
            init_flag = {'ctx':False}
@@ -966,19 +952,28 @@ class CMS:
                        vHeader = vHeader.to_list()
                    vodHeader = vHeader
                else:
-                    # print(p['tabs'].split(';')[0])
+                    tab_parse = p['tabs'].split(';')[0]
-                    vHeader = pdfa(html, p['tabs'].split(';')[0])
+                    # print('tab_parse:',tab_parse)
-                    # print(f'线路列表数:{len((vodHeader))}')
+                    vHeader = pdfa(html, tab_parse)
-                    # print(vodHeader)
+                    # print(vHeader)
+                    print(f'二级线路定位列表数:{len((vHeader))}')
+                    # print(vHeader[0].outerHtml())
+                    # print(vHeader[0].toString())
+                    # from lxml import etree
+                    # print(str(etree.tostring(vHeader[0], pretty_print=True), 'utf-8'))
+                    from lxml.html import tostring as html2str
+                    # print(html2str(vHeader[0].root).decode('utf-8'))
                    if not is_json:
                        for v in vHeader:
                            # 过滤排除掉线路标题
                            v_title = pq(v).text()
+                            # print(v_title)
                            if self.tab_exclude and jsp.test(self.tab_exclude, v_title):
                                continue
                            vodHeader.append(v_title)
                    else:
                        vodHeader = vHeader
+                    print(f'过滤后真实线路列表数:{len((vodHeader))} {vodHeader}')
            else:
                vodHeader = ['道长在线']
@@ -1010,9 +1005,11 @@ class CMS:
                else:
                    for i in range(len(vodHeader)):
                        tab_name = str(vodHeader[i])
+                        # print(tab_name)
                        tab_ext = p['tabs'].split(';')[1] if len(p['tabs'].split(';')) > 1 else ''
                        p1 = p['lists'].replace('#idv', tab_name).replace('#id', str(i))
                        tab_ext = tab_ext.replace('#idv', tab_name).replace('#id', str(i))
+                        # print(p1)
                        vodList = pdfa(html, p1)  # 1条线路的选集列表
                        # print(vodList)
                        # vodList = [pq(i).text()+'$'+pd(i,'a&&href') for i in vodList]  # 拼接成 名称$链接
@@ -1028,7 +1025,7 @@ class CMS:
                        vod_tab_list.append(vlist)
                    vod_play_url = vod_play_url.join(vod_tab_list)
-            print(vod_play_url)
+            # print(vod_play_url)
            vod['vod_play_from'] = vod_play_from
            # print(vod_play_from)
            vod['vod_play_url'] = vod_play_url

--- a/js/8号影院.js
+++ b/js/8号影院.js
+var rule={
+title:'8号影院',
+    host:'http://www.8hysw.com',
+    // homeUrl:'/',
+    url:'/frim/fyclass-fypage.html',
+    searchUrl:'/search.php?page=fypage&searchword=**&searchtype=',
+searchable:2,//是否启用全局搜索,
+quickSearch:0,//是否启用快速搜索,
+filterable:0,//是否启用分类筛选,
+class_name:'电影&电视剧&综艺&动漫&日韩剧&国产剧&欧美剧&港台剧',
+    class_url:'1&2&3&4&16&13&15&14',
+play_parse:true,
+lazy:'',
+limit:6,
+tab_exclude:'本周热门|最近更新',
+推荐:'.stui-pannel_bd;.stui-vodlist li;h4&&Text;.lazyload&&data-original;.text-right&&Text;a&&href',
+double:true, // 推荐内容是否双层定位
+一级:'.stui-vodlist.clearfix&&li;a&&title;.lazyload&&data-original;.text-right&&Text;a&&href',
+二级:{"title":"h1&&Text;.stui-content__detail&&p&&Text","img":".lazyload&&data-original","desc":".data:eq(0)&&Text;.data:eq(1)&&Text;.data:eq(2)&&Text;.data:eq(3)&&Text","content":".desc&&Text","tabs":".stui-pannel__head.bottom-line h3","lists":".stui-content__playlist:eq(#id) li"},
+ 搜索:muban.首图2.搜索2,
+}
\ No newline at end of file
--- a/js/KUBO影视[飞].js
+++ b/js/KUBO影视[飞].js
+var rule = {
+    title:'KUBO影视',
+    host:'https://123kubo.tv',
+    // homeUrl:'/',
+    url:'/show/fyclass/page/fypage.html',
+    searchUrl:'/search/page/fypage/wd/**.html',
+    searchable:2,//是否启用全局搜索,
+    quickSearch:0,//是否启用快速搜索,
+    filterable:0,//是否启用分类筛选,
+    headers:{//网站的请求头,完整支持所有的,常带ua和cookies
+        'User-Agent':'MOBILE_UA',
+        // "Cookie": "searchneed=ok"
+    },
+    class_name:'电影&电视剧&综艺&动漫',
+    class_url:'1&2&3&4',
+    //class_parse:'.myui-panel-box&&ul&&li;a&&Text;a&&href;/v/(.*)/',
+    play_parse:true,
+    lazy:'',
+    limit:6,
+    推荐:'ul.hl-vod-list;li;a&&title;.hl-item-thumb.hl-lazy&&data-original;.hl-pic-text&&Text;a&&href',
+    double:true, // 推荐内容是否双层定位
+    一级:'.hl-list-item;a&&title;.hl-item-thumb.hl-lazy&&data-original;.hl-pic-text&&Text;a&&href',
+    二级:{"title":".hl-item-thumb.hl-lazy&&title;.hl-full-box&&ul li:eq(6)&&Text","img":".hl-item-thumb.hl-lazy&&data-original","desc":".hl-full-box&&ul&&li:eq(1)&&Text;.hl-full-box&&ul&&li:eq(2)&&Text;.hl-full-box&&ul&&li:eq(3)&&Text","content":".hl-col-xs-12.blurb&&Text","tabs":".hl-plays-from:eq(0) a","lists":".hl-plays-list:eq(#id) li"},
+    搜索:'.hl-item-div;a&&title;.hl-item-thumb&&data-original;.hl-lc-1&&Text;a&&href;.text-muted:eq(-1)&&Text',
+}
--- a/js/TV蜂.js
+++ b/js/TV蜂.js
+var rule = {
+    title:'TV蜂',
+    host:'https://www.tvfeng.net',
+    // homeUrl:'/',
+    url:'/tvfenshow/fyclass--------fypage---.html',
+    searchUrl:'/tvfensearch/**----------fypage---.html',
+    searchable:2,//是否启用全局搜索,
+    quickSearch:0,//是否启用快速搜索,
+    filterable:0,//是否启用分类筛选,
+    headers:{//网站的请求头,完整支持所有的,常带ua和cookies
+        'User-Agent':'MOBILE_UA',
+        // "Cookie": "searchneed=ok"
+    },
+    class_name:'电影&电视剧&综艺&动漫',
+    class_url:'1&2&3&4',
+    play_parse:true,
+    lazy:'',
+    limit:6,
+    推荐:'.module-list;.module-items&&.module-item;a&&title;img&&data-src;.module-item-text&&Text;a&&href',
+    double:true, // 推荐内容是否双层定位
+    一级:'.module-items .module-item;a&&title;img&&data-src;.module-item-text&&Text;a&&href',
+    二级:{"title":"h1&&Text;.tag-link&&Text","img":".module-item-pic&&img&&data-src","desc":".video-info-items:eq(0)&&Text;.video-info-items:eq(3)&&Text;.video-info-items:eq(2)&&Text;.video-info-items:eq(1)&&Text","content":".vod_content&&Text","tabs":".module-tab-item","lists":".module-player-list:eq(#id)&&.scroll-content&&a"},
+    搜索:'.module-items .module-search-item;h3&&Text;img&&data-src;.video-serial&&Text;a&&href',
+}
--- a/txt/js/tg/完美看看.js
+++ b/txt/js/tg/完美看看.js
@@ -2,9 +2,8 @@ muban.首图2.二级.tabs = '.stui-pannel__head&&h3';
 var rule = Object.assign(muban.首图2,{
 title:'完美看看',
 host:'https://www.wanmeikk.film',
+class_parse:'.dropdown&&li;a&&Text;a&&href;.*/(.*?).html',
+cate_exclude:'消息|专题',
 url:'/category/fyclass-fypage.html',
-searchUrl:'/vodsearch/**-------------.html',
+searchUrl:'/so/-------------.html?wd=**&submit=',
-class_name:'电影&美剧&韩剧&日剧&国产剧&动漫',//静态分类名称拼接
+});
-class_url:'1&2&3&4&5&6',//静态分类标识拼接
\ No newline at end of file
-class_parse:'',
-});
--- a/js/尘落影视.js
+++ b/js/尘落影视.js
+var rule={
+    title:'尘落影视',
+    host:'http://v.ftixkrv.cn',
+    url:'/whole/fyclass_______0_addtime_fypage.html',   
+    searchUrl:'/?c=search&wd=**&sort=addtime&order=desc&page=fypage',   
+    searchable:2,//是否启用全局搜索,
+    quickSearch:0,//是否启用快速搜索,
+    filterable:0,//是否启用分类筛选,
+    headers:{//网站的请求头,完整支持所有的,常带ua和cookies
+        'User-Agent':'PC_UA',
+        // "Cookie": "searchneed=ok"
+    },
+    class_name:'电影&电视剧&综艺&动漫',
+    class_url:'1&2&4&3',
+    cate_exclude:'全网资源',
+    play_parse:true,
+    lazy:'',
+    limit:6,
+   推荐:'.movie-item-in;a&&title;img&&src;em&&Text;a&&href',  
+   一级:'.movie-item-in;a&&title;img&&src;em&&Text;a&&href',
+   二级:{"title":"h1&&Text;.table-striped tr:eq(2)&&Text","img":".img-thumbnail&&src","desc":";;.table-striped tr:eq(3)&&Text;.table-striped tr:eq(1)&&Text;.table-striped tr:eq(0)&&Text","content":".movie-introduce&&Text","tabs":".nav.nav-tabs li a","lists":".tab-pane.active:eq(#id) div a"},
+   搜索:'.movie-item-in;a&&title;img&&src;em&&Text;a&&href',
+}
\ No newline at end of file
--- a/js/抓饭体育.js
+++ b/js/抓饭体育.js
+// 道长 drpy仓库 https://gitcode.net/qq_32394351/dr_py
+// drpy安卓本地搭建说明 https://gitcode.net/qq_32394351/dr_py/-/blob/master/%E5%AE%89%E5%8D%93%E6%9C%AC%E5%9C%B0%E6%90%AD%E5%BB%BA%E8%AF%B4%E6%98%8E.md
+// Pluto Player官方TG https://t.me/PlutoPlayer
+// Pluto Player官方TG https://t.me/PlutoPlayerChannel
+var rule = {
+    title:'抓饭体育',
+    host:'https://www.zhuafan.tech',
+    url:'/sports-home/category/fyclass',
+    class_name:'全部&足球&篮球&羽乒&台球&棒球&户外&搏击&综合&棋盘&电竞&网球&排球&聊天&原声',
+    class_url:'all&Football&Basketball&Badminton&Billiards&Baseball&Outdoors&Wrestling&Others&Boardgame&Popular&Tennis&Volleyball&Chat&Acoustic',
+	homeUrl:'/sports-home/category/all',//网站的首页链接,用于分类获取和推荐获取
+    detailUrl:'https://m.zhuafan.tech/fyid',//二级详情拼接链接(json格式用)
+    searchUrl:'/live-search/search/query/data?keyword=**&page=fypage&num=&searchType=all&uid=null&from=pc',
+    searchable:2,
+    quickSearch:0,
+    headers:{ 
+        'User-Agent':'PC_UA'
+    },
+    limit:6,
+    timeout:5000,
+    play_parse:true,
+    lazy:'',
+    double:false,
+    推荐:'*',
+    一级:'json:data;cname;imageUrl;uname;id',
+    二级:'*',
+	搜索:'json:cObj.cList;*;*;*;_id',
+}
\ No newline at end of file
--- a/js/斗鱼直播.js
+++ b/js/斗鱼直播.js
+// 道长 drpy仓库 https://gitcode.net/qq_32394351/dr_py
+// drpy安卓本地搭建说明 https://gitcode.net/qq_32394351/dr_py/-/blob/master/%E5%AE%89%E5%8D%93%E6%9C%AC%E5%9C%B0%E6%90%AD%E5%BB%BA%E8%AF%B4%E6%98%8E.md
+// Pluto Player官方TG https://t.me/PlutoPlayer
+// Pluto Player官方TG https://t.me/PlutoPlayerChannel
+var rule = {
+    title:'斗鱼直播',
+    host:'https://www.douyu.com',
+    homeUrl:'/japi/weblist/apinc/recLabelList?',//网站的首页链接,用于分类获取和推荐获取
+	url:'/gapi/rkc/directory/mixList/fyclass/fypage',
+    class_name:'一起看&网游竞技&单机热游&手游休闲&娱乐天地&科技文化&语音互动&语音直播&正能量&颜值&音乐&舞蹈&二次元&户外&美食&互动交友&趣生活&数码科技&文化&科普&社会人文&汽车&纪录片&斗鱼购物&交友&电台&一起玩&音乐之声&正能量&英雄联盟&热门游戏&DOTA2&穿越火线&CFHD&DNF&炉石传说&CS:GO&逆战&lol云顶之弈&魔兽争霸&魔兽怀旧服&网易游戏&守望先锋&DOTA&魔兽世界&天涯明月刀&三国杀&主机游戏&永劫无间&生死狙击2&迷失ARK&艾尔登法环&逃离塔科夫&V Rising&海上狼人杀&怀旧游戏&王者荣耀&和平精英&火影忍者&LOL手游&金铲铲之战&重返帝国&COD手游&哈利波特：魔法觉醒&CF手游&欢乐斗地主&原神&天刀手游&棋牌娱乐&欢乐麻将&新游中心&QQ飞车&阴阳师&热门手游',
+    class_url:'2_208&1_1&1_15&1_9&1_2&1_11&1_20&1_18&1_13&2_201&2_175&2_1008&2_174&2_124&2_194&2_1555&2_1097&2_134&2_195&2_204&2_1162&2_136&2_514&2_1203&2_1221&2_1556&2_1575&2_910&2_250&2_1&2_270&2_3&2_33&2_1997&2_40&2_2&2_6&2_46&2_917&2_55&2_1055&2_3567&2_148&2_217&2_5&2_59&2_14&2_19&2_1227&2_1781&2_3528&2_3406&2_1024&2_3684&2_3556&2_26&2_181&2_350&2_196&2_1920&2_2556&2_2915&2_767&2_1192&2_178&2_416&2_1223&2_911&2_113&2_451&2_229&2_331&2_240&2_30',
+    detailUrl:'/fyid',//二级详情拼接链接(json格式用)
+    searchUrl:'/japi/search/api/searchShow?kw=**&page=fypage&pageSize=20',
+    searchable:2,
+    quickSearch:0,
+    headers:{
+		'User-Agent':'PC_UA'
+	},
+    timeout:5000,
+    limit:8,
+    play_parse:true,
+    lazy:'',
+    double:true,
+	推荐:'json:data.list;room;*;cover;*;*',
+	一级:'json:data.rl;rn;rs16;nn;rid',
+    二级:'*',
+    搜索:'json:data.relateShow;roomName;roomSrc;nickName;*',
+}
\ No newline at end of file
--- a/js/爱迪影视.js
+++ b/js/爱迪影视.js
+var rule={
+    title:'爱迪影视',
+    host:'https://aidi.tv',
+    url:'/show/fyclass--------fypage---.html',
+    searchUrl:'/vsearch/-------------.html?wd=**&submit=',
+    searchable:2,
+    quickSearch:0,
+    filterable:0,
+    headers:{ 'User-Agent':'MOBILE_UA', },
+    class_name:'电影&电视剧&综艺&动漫',
+    class_url:'dianying&lianxuju&zongyi&dongman',
+    tab_exclude:'app专用|VIP线路',
+    play_parse:true,
+    double:true,
+    推荐:'body .vodlist.vodlist_wi;li;a&&title;.vodlist_thumb.lazyload&&data-original;.pic-text&&Text;a&&href',   
+    一级:'.vodlist.vodlist_wi&&li;a&&title;.lazyload&&data-original;.pic-text&&Text;a&&href',
+    二级:{"title":"h2&&Text;.data:eq(1)&&Text","img":".lazyload&&data-original","desc":";.content_min li:eq(1)&&Text;;.content_min li:eq(2)&&Text;.content_min li:eq(3)&&Text;.data:eq(4)&&Text","content":".context.clearfix&&Text","tabs":".play_source_tab&&a","lists":".content_playlist:eq(#id) li"},
+    搜索:'.searchlist_img;a&&title;.vodlist_thumb.lazyload&&data-original;.pic-text&&Text;a&&href',
+}
\ No newline at end of file
--- a/js/蛋蛋赞.js
+++ b/js/蛋蛋赞.js
@@ -3,7 +3,7 @@ var rule={
    host:'https://www.dandanzan10.top',
    // homeUrl:'/',
    url:'/fyclass/index_fypage.html[/fyclass/index.html]',   
-    //searchUrl:'/search/**/',
+    searchUrl:'/so/**-**--.html',
    searchable:2,//是否启用全局搜索,
    quickSearch:0,//是否启用快速搜索,
    filterable:0,//是否启用分类筛选,
@@ -20,5 +20,5 @@ var rule={
    double:true, // 推荐内容是否双层定位
   二级:{"title":"h1&&Text;.product-excerpt:eq(2)&&Text","img":".thumb&&src","desc":";;.product-excerpt:eq(3)&&Text;.product-excerpt:eq(1)&&Text;.product-excerpt:eq(0)&&Text","content":".product-excerpt:eq(5)&&Text","tabs":".playlists dl dt","lists":".play-div-oa:eq(#id) li"},
-   搜索:'ul.img-list.clearfix&&li;a&&title;.lazyload&&data-original;.pic-text&&Text;a&&href',
+   搜索:'.lists-content&&ul&&li;*;*;*;*',
 }
\ No newline at end of file
--- a/utils/htmlParser.py
+++ b/utils/htmlParser.py
@@ -6,6 +6,7 @@
 import json
 from pyquery import PyQuery as pq
+from lxml import etree
 from urllib.parse import urljoin
 import re
 from jsonpath import jsonpath
@@ -22,7 +23,6 @@ class jsoup:
    def pdfh(self,html,parse:str,add_url=False):
        if not parse:
            return ''
        doc = pq(html)
        option = None
        if parse.find('&&') > -1:
@@ -66,10 +66,14 @@ class jsoup:
            # ret = doc(parse) # 下面注释的写法不对的
            # ret = ret.find(':first')
            # ret = ret.children(':first')
-            ret = str(ret)
+            # print(parse)
+            # ret = str(ret)
+            ret = ret.outerHtml()
        return ret
    def pdfa(self,html,parse:str):
+        # 看官方文档才能解决这个问题!!!
+        # https://pyquery.readthedocs.io/en/latest/api.html
        if not parse:
            return []
        if parse.find('&&') > -1:
@@ -78,8 +82,15 @@ class jsoup:
            parse = ' '.join([parse[i] if self.test(':eq|:lt|:gt', parse[i]) or i>=len(parse)-1 else f'{parse[i]}:eq(0)' for i in range(len(parse))])
        # print(f'pdfa:{parse}')
        doc = pq(html)
-        res = [str(item) for item in doc(parse).items()]
+        result = doc(parse)
+        # 节点转字符串
+        # print(str(etree.tostring(result[0], pretty_print=True), 'utf-8'))
+        # res = [item for item in result.items()]
+        res = [item.outerHtml() for item in result.items()] #  这个才是对的！！str() item str(etree.tostring 统统错误
+        # res = [str(item) for item in result.items()]
+        # res = [str(etree.tostring(item, pretty_print=True), 'utf-8') for item in result]
        # print(len(res),res)
+        # print('pdfa执行结果数:',len(res))
        return res
    def pd(self,html,parse:str):