From 3adf261f2e1aac4588b5e7c007ba6db77e991c6f Mon Sep 17 00:00:00 2001
From: hjdhnx <hjd124579>
Date: Sat, 3 Sep 2022 13:10:59 +0800
Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E4=BA=86=E4=B8=80=E5=A0=86?=
 =?UTF-8?q?=E4=B8=9C=E8=A5=BF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app.py                              |  34 ++++++--
 classes/cms.py                      | 121 ++++++++++++++++++++--------
 "js/360\345\275\261\350\247\206.js" |  23 ++++++
 "js/\350\247\243\346\236\220.conf"  |  16 ++++
 "js/\350\247\243\346\236\220.txt"   |  13 ---
 requirements.txt                    |   3 +-
 templates/config.txt                |   5 +-
 txt/issue.txt                       |   2 +
 utils/htmlParser.py                 |  60 ++++++++++++--
 9 files changed, 213 insertions(+), 64 deletions(-)
 create mode 100644 "js/360\345\275\261\350\247\206.js"
 create mode 100644 "js/\350\247\243\346\236\220.conf"
 delete mode 100644 "js/\350\247\243\346\236\220.txt"
diff --git a/app.py b/app.py
index 62061dc..d6730f4 100644
--- a/app.py
+++ b/app.py
@@ -89,10 +89,14 @@ def forbidden():  # put application's code here
 @app.route('/index')
 def index():  # put application's code here
     # logger.info("进入了首页")
-    sup_port = app.config.get('SUP_PORT', 9001)
-    manager0 = ':'.join(getHost(0).split(':')[0:2]) + f':{sup_port}'
-    manager1 = ':'.join(getHost(1).split(':')[0:2]) + f':{sup_port}'
-    manager2 = ':'.join(getHost(2).split(':')[0:2]) + f':{sup_port}'
+    sup_port = app.config.get('SUP_PORT', False)
+    manager0 = ':'.join(getHost(0).split(':')[0:2])
+    manager1 = ':'.join(getHost(1).split(':')[0:2])
+    manager2 = ':'.join(getHost(2).split(':')[0:2]).replace('https','http')
+    if sup_port:
+        manager0 += f':{sup_port}'
+        manager1 += f':{sup_port}'
+        manager2 += f':{sup_port}'
     # print(manager1)
     # print(manager2)
     return render_template('index.html',getHost=getHost,manager0=manager0,manager1=manager1,manager2=manager2,is_linux=is_linux())
@@ -243,7 +247,12 @@ def vod():
     if play_url:  # 播放
         jxs = getJxs()
         play_url = cms.playContent(play_url,jxs)
-        return redirect(play_url)
+        if isinstance(play_url,str):
+            return redirect(play_url)
+        elif isinstance(play_url,dict):
+            return jsonify(play_url)
+        else:
+            return play_url
 
     if ac and t: # 一级
         data = cms.categoryContent(t,pg)
@@ -337,9 +346,20 @@ def getPics(path='images'):
     return pic_list
 
 def getJxs(path='js'):
-    with open(f'{path}/解析.txt',encoding='utf-8') as f:
+    with open(f'{path}/解析.conf',encoding='utf-8') as f:
         data = f.read().strip()
-    jxs = [{'name':dt.split(',')[0],'url':dt.split(',')[1]} for dt in data.split('\n')]
+    jxs = []
+    for i in data.split('\n'):
+        i = i.strip()
+        dt = i.split(',')
+        if not i.startswith('#'):
+            jxs.append({
+                'name':dt[0],
+                'url':dt[1],
+                'type':dt[2] if len(dt) > 2 else 0,
+            })
+    # jxs = [{'name':dt.split(',')[0],'url':dt.split(',')[1]} for dt in data.split('\n')]
+    # jxs = list(filter(lambda x:not str(x['name']).strip().startswith('#'),jxs))
     # print(jxs)
     print(f'共计{len(jxs)}条解析')
     return jxs
diff --git a/classes/cms.py b/classes/cms.py
index d31319d..258dd34 100644
--- a/classes/cms.py
+++ b/classes/cms.py
@@ -186,6 +186,10 @@ class CMS:
         pdfh = jsp.pdfh
         pdfa = jsp.pdfa
         pd = jsp.pd
+        pjfh = jsp.pjfh
+        pjfa = jsp.pjfa
+        pj = jsp.pj
+
         pq = jsp.pq
         return pdfh,pdfa,pd,pq
 
@@ -409,9 +413,11 @@ class CMS:
         result = {}
         videos = []
         jsp = jsoup(self.homeUrl)
-        pdfh = jsp.pdfh
-        pdfa = jsp.pdfa
-        pd = jsp.pd
+        is_json = str(p[0]).startswith('json:')
+        pdfh = jsp.pjfh if is_json else jsp.pdfh
+        pdfa = jsp.pjfa if is_json else jsp.pdfa
+        pd = jsp.pj if is_json else jsp.pd
+        print(html)
         try:
             if self.double:
                 items = pdfa(html, p[0])
@@ -422,7 +428,8 @@ class CMS:
                             title = pdfh(item2, p[2])
                             img = pd(item2, p[3])
                             desc = pdfh(item2, p[4])
-                            link = pd(item2, p[5])
+                            links = [pd(item, p5) if not self.detailUrl else pdfh(item, p5) for p5 in p[5].split('+')]
+                            link = '$'.join(links)
                             content = '' if len(p) < 7 else pdfh(item2, p[6])
                             videos.append({
                                 "vod_id": link,
@@ -436,13 +443,16 @@ class CMS:
                         except:
                             pass
             else:
-                items = pdfa(html, p[0])
+                items = pdfa(html, p[0].replace('json:',''))
+                # print(items)
                 for item in items:
                     try:
                         title = pdfh(item, p[1])
                         img = pd(item, p[2])
                         desc = pdfh(item, p[3])
-                        link = pd(item, p[4])
+                        # link = pd(item, p[4])
+                        links = [pd(item, p5) if not self.detailUrl else pdfh(item, p5) for p5 in p[4].split('+')]
+                        link = '$'.join(links)
                         content = '' if len(p) < 6 else pdfh(item, p[5])
                         videos.append({
                             "vod_id": link,
@@ -495,9 +505,10 @@ class CMS:
             return self.blank()
 
         jsp = jsoup(self.url)
-        pdfh = jsp.pdfh
-        pdfa = jsp.pdfa
-        pd = jsp.pd
+        is_json = str(p[0]).startswith('json:')
+        pdfh = jsp.pjfh if is_json else jsp.pdfh
+        pdfa = jsp.pjfa if is_json else jsp.pdfa
+        pd = jsp.pj if is_json else jsp.pd
         # print(pdfh(r.text,'body a.module-poster-item.module-item:eq(1)&&Text'))
         # print(pdfh(r.text,'body a.module-poster-item.module-item:eq(0)'))
         # print(pdfh(r.text,'body a.module-poster-item.module-item:first'))
@@ -508,28 +519,32 @@ class CMS:
             r = requests.get(url, headers=self.headers, timeout=self.timeout)
             r.encoding = self.encoding
             print(r.url)
-            html = r.text
+            # html = r.text
+            html = r.json() if is_json else r.text
             # print(html)
-            items = pdfa(html, p[0])
+            items = pdfa(html,p[0].replace('json:','',1))
         except:
             pass
+        # print(items)
         for item in items:
             # print(item)
             try:
                 title = pdfh(item, p[1])
                 img = pd(item, p[2])
                 desc = pdfh(item, p[3])
-                link = pd(item, p[4])
+                links = [pd(item, p4) if not self.detailUrl else pdfh(item, p4) for p4 in p[4].split('+')]
+                link = '$'.join(links)
                 content = '' if len(p) < 6 else pdfh(item, p[5])
                 # sid = self.regStr(sid, "/video/(\\S+).html")
                 videos.append({
-                    "vod_id": link,
+                    "vod_id": f'{fyclass}${link}' if self.detailUrl else link,# 分类,播放链接
                     "vod_name": title,
                     "vod_pic": img,
                     "vod_remarks": desc,
                     "vod_content": content,
                 })
-            except:
+            except Exception as e:
+                print(f'发生了错误:{e}')
                 pass
         result['list'] = videos
         result['page'] = fypage
@@ -540,11 +555,11 @@ class CMS:
         
         return result
 
-    def detailOneVod(self,id):
+    def detailOneVod(self,id,fyclass=''):
         detailUrl = str(id)
         vod = {}
         if not detailUrl.startswith('http'):
-            url = self.detailUrl.replace('fyid', detailUrl)
+            url = self.detailUrl.replace('fyid', detailUrl).replace('fyclass',fyclass)
         else:
             url = detailUrl
         # print(url)
@@ -563,20 +578,24 @@ class CMS:
                 return vod
 
             jsp = jsoup(self.url)
-            pdfh = jsp.pdfh
-            pdfa = jsp.pdfa
-            pd = jsp.pd
+
+            is_json = p.get('is_json',False) # 二级里加is_json参数
+            pdfh = jsp.pjfh if is_json else jsp.pdfh
+            pdfa = jsp.pjfa if is_json else jsp.pdfa
+            pd = jsp.pj if is_json else jsp.pd
             pq = jsp.pq
             obj = {}
             vod_name = ''
             r = requests.get(url, headers=self.headers, timeout=self.timeout)
             r.encoding = self.encoding
-            html = r.text
+            # html = r.text
+            html = r.json() if is_json else r.text
             # print(html)
             if p.get('title'):
                 p1 = p['title'].split(';')
                 vod_name = pdfh(html,p1[0]).replace('\n',' ')
-                title = '\n'.join([pdfh(html,i).replace('\n',' ') for i in p1])
+                # title = '\n'.join([pdfh(html,i).replace('\n',' ') for i in p1])
+                title = '\n'.join([','.join([pdfh(html, pp1).strip() for pp1 in i.split('+')]) for i in p1])
                 # print(title)
                 obj['title'] = title
             if p.get('desc'):
@@ -610,10 +629,11 @@ class CMS:
             vod_play_from = '$$$'
             playFrom = []
             if p.get('tabs'):
-                vodHeader = pdfa(html,p['tabs'])
+                vodHeader = pdfa(html,p['tabs'].split(';')[0])
                 # print(f'线路列表数:{len((vodHeader))}')
                 # print(vodHeader)
-                vodHeader = [pq(v).text() for v in vodHeader]
+                if not is_json:
+                    vodHeader = [pq(v).text() for v in vodHeader]
             else:
                 vodHeader = ['道长在线']
 
@@ -625,10 +645,20 @@ class CMS:
             vod_tab_list = []
             if p.get('lists'):
                 for i in range(len(vodHeader)):
-                   p1 = p['lists'].replace('#id',str(i))
+                   tab_name = str(vodHeader[i])
+                   tab_ext = p['tabs'].split(';')[1] if len(p['tabs'].split(';')) > 1 else ''
+                   p1 = p['lists'].replace('#idv',tab_name).replace('#id',str(i))
+                   tab_ext = tab_ext.replace('#idv',tab_name).replace('#id',str(i))
                    vodList = pdfa(html,p1) # 1条线路的选集列表
+                   # print(vodList)
                    # vodList = [pq(i).text()+'$'+pd(i,'a&&href') for i in vodList]  # 拼接成 名称$链接
-                   vodList = [pq(i).text()+'$'+self.play_url+pd(i,'a&&href') for i in vodList]  # 拼接成 名称$链接
+                   if self.play_parse: # 自动base64编码
+                       vodList = [(pdfh(html,tab_ext) if tab_ext else tab_name)+'$'+self.play_url+base64Encode(i) for i in vodList] if is_json else\
+                           [pq(i).text()+'$'+self.play_url+base64Encode(pd(i,'a&&href')) for i in vodList]  # 拼接成 名称$链接
+                   else:
+                       vodList = [(pdfh(html, tab_ext) if tab_ext else tab_name) + '$' + self.play_url + i for i in
+                                  vodList] if is_json else \
+                           [pq(i).text() + '$' + self.play_url + pd(i, 'a&&href') for i in vodList]  # 拼接成 名称$链接
                    vlist = '#'.join(vodList) # 拼多个选集
                    vod_tab_list.append(vlist)
                 vod_play_url = vod_play_url.join(vod_tab_list)
@@ -652,7 +682,12 @@ class CMS:
         obj_list = []
         try:
             for vod_url in array:
-                obj = thread_pool.submit(self.detailOneVod, vod_url)
+                vod_class = ''
+                if vod_url.find('$') > -1:
+                    tmp = vod_url.split('$')
+                    vod_class = tmp[0]
+                    vod_url = tmp[1]
+                obj = thread_pool.submit(self.detailOneVod, vod_url,vod_class)
                 obj_list.append(obj)
             thread_pool.shutdown(wait=True)  # 等待所有子线程并行完毕
             vod_list = [obj.result() for obj in obj_list]
@@ -680,17 +715,19 @@ class CMS:
         if len(p) < 5:
             return self.blank()
         jsp = jsoup(self.url)
-        pdfh = jsp.pdfh
-        pdfa = jsp.pdfa
-        pd = jsp.pd
+        is_json = str(p[0]).startswith('json:')
+        pdfh = jsp.pjfh if is_json else jsp.pdfh
+        pdfa = jsp.pjfa if is_json else jsp.pdfa
+        pd = jsp.pj if is_json else jsp.pd
         pq = jsp.pq
         videos = []
         try:
             r = requests.get(url, headers=self.headers,timeout=self.timeout)
             r.encoding = self.encoding
-            html = r.text
+            # html = r.text
+            html = r.json() if is_json else r.text
             # print(html)
-            if html.find('输入验证码') > -1:
+            if not is_json and html.find('输入验证码') > -1:
                 cookie = verifyCode(url,self.headers,self.timeout,self.retry_count,self.ocr_api)
                 # cookie = ''
                 if not cookie:
@@ -703,7 +740,7 @@ class CMS:
                 r.encoding = self.encoding
                 html = r.text
 
-            items = pdfa(html, p[0])
+            items = pdfa(html,p[0].replace('json:','',1))
             # print(items)
             videos = []
             for item in items:
@@ -712,7 +749,9 @@ class CMS:
                     title = pdfh(item, p[1])
                     img = pd(item, p[2])
                     desc = pdfh(item, p[3])
-                    link = pd(item, p[4])
+                    # link = '$'.join([pd(item, p4) for p4 in p[4].split('+')])
+                    links = [pd(item, p4) if not self.detailUrl else pdfh(item, p4) for p4 in p[4].split('+')]
+                    link = '$'.join(links)
                     content = '' if len(p) < 6 else pdfh(item, p[5])
                     # sid = self.regStr(sid, "/video/(\\S+).html")
                     videos.append({
@@ -736,6 +775,10 @@ class CMS:
         # logger.info('播放免嗅地址: ' + self.play_url)
         if not jxs:
             jxs = []
+        try:
+            play_url = baseDecode(play_url) # 自动base64解码
+        except:
+            pass
         if self.lazy:
             print(f'{play_url}->开始执行免嗅代码{type(self.lazy)}->{self.lazy}')
             t1 = time()
@@ -777,6 +820,10 @@ class CMS:
                     loader,_ = runJScode(jscode,ctx=ctx)
                     # print(loader.toString())
                     play_url = loader.eval('input')
+                    if isinstance(play_url,JsObjectWrapper):
+                        play_url = play_url.to_dict()
+                    # print(type(play_url))
+                    # print(play_url)
                     logger.info(f'js免嗅耗时:{get_interval(t1)}毫秒,播放地址:{play_url}')
             except Exception as e:
                 logger.info(f'免嗅耗时:{get_interval(t1)}毫秒,并发生错误:{e}')
@@ -786,12 +833,15 @@ class CMS:
             return play_url
 
 if __name__ == '__main__':
+    print(urljoin('https://api.web.360kan.com/v1/f',
+                  '//0img.hitv.com/preview/sp_images/2022/01/28/202201281528074643023.jpg'))
+    # exit()
     from utils import parser
     # js_path = f'js/玩偶姐姐.js'
     # js_path = f'js/555影视.js'
     with open('../js/模板.js', encoding='utf-8') as f:
         before = f.read()
-    js_path = f'js/vip影院.js'
+    js_path = f'js/360影视.js'
     ctx, js_code = parser.runJs(js_path,before=before)
     ruleDict = ctx.rule.to_dict()
     # lazy = ctx.eval('lazy')
@@ -807,4 +857,5 @@ if __name__ == '__main__':
     # print(cms.detailContent(1,['https://cokemv.me/voddetail/40573.html']))
     # cms.categoryContent('dianying',1)
     # print(cms.detailContent(['67391']))
-    print(cms.searchContent('斗罗大陆'))
\ No newline at end of file
+    # print(cms.searchContent('斗罗大陆'))
+    print(cms.searchContent('独行月球'))
\ No newline at end of file
diff --git "a/js/360\345\275\261\350\247\206.js" "b/js/360\345\275\261\350\247\206.js"
new file mode 100644
index 0000000..44427a8
--- /dev/null
+++ "b/js/360\345\275\261\350\247\206.js"
@@ -0,0 +1,23 @@
+var rule = {
+    title:'360影视',
+    host:'https://www.360kan.com',
+    homeUrl:'https://api.web.360kan.com/v1/rank?cat=2&size=9',
+    detailUrl:'https://api.web.360kan.com/v1/detail?cat=fyclass&id=fyid',
+    searchUrl:'https://api.so.360kan.com/index?force_v=1&kw=**&from=&pageno=fypage&v_ap=1&tab=all',
+    url:'https://api.web.360kan.com/v1/filter/list?catid=fyclass&rank=rankhot&cat=&year=&area=&act=&size=35&pageno=fypage&callback=',
+    headers:{
+        'User-Agent':'MOBILE_UA'
+    },
+    timeout:5000,
+    class_name:'电视剧&电影&综艺&动漫',
+    class_url:'2&1&3&4',
+    limit:5,
+    play_parse:true,
+    // play_parse:true,
+    lazy:'js:input={parse: 1, playUrl: "", jx: 1, url: input}',
+    推荐:'json:data;title;cover;comment;cat+ent_id;description',
+    一级:'json:data.movies;title;cover;pubdate;id;description',
+    二级:{is_json:1,"title":"data.title;data.moviecategory[0]+data.moviecategory[1]","img":"data.cdncover","desc":"data.area[0];data.director[0]","content":"data.description","tabs":"data.playlink_sites;data.playlinksdetail.#idv.quality","lists":"data.playlinksdetail.#idv.default_url"},
+    // 二级:{is_json:1,"title":"data.title;data.moviecategory[0]+data.moviecategory[1]","img":"data.cdncover","desc":"data.area[0];data.director[0]","content":"data.description","tabs":"data.playlink_sites","lists":"data.playlinksdetail.#idv.default_url"},
+    搜索:'json:data.longData.rows;titleTxt;cover;score;cat_id+id;description',
+}
\ No newline at end of file
diff --git "a/js/\350\247\243\346\236\220.conf" "b/js/\350\247\243\346\236\220.conf"
new file mode 100644
index 0000000..539b79b
--- /dev/null
+++ "b/js/\350\247\243\346\236\220.conf"
@@ -0,0 +1,16 @@
+# 0123，对应，普通解析，json解析，并发解析，聚合解析,参数3不填默认0
+BT5V,https://rx.bt5v.com/json/jsonindex.php/?url=,1
+爱酷,https://cache.json.icu/home/api?type=ys&uid=292796&key=fnoryABDEFJNPQV269&url=,1
+# m3u8tv,https://jx.m3u8.tv/jiexi/?url=
+# 思古解析,https://jsap.attakids.com/?url=
+# 云解析,https://jx.ppflv.com/?url=
+# 云解析2,https://jx.aidouer.net/?url=
+# BL解析,https://vip.bljiex.cc/?v=
+# 虾米解析,https://jx.xmflv.com/?url=
+# 飞飞智能,https://y.9dan.cc/?v=
+# 左岸解析,https://jx.bozrc.com:4433/player/?url=
+# ok解析,https://okjx.cc/?url=
+# 8090解析,https://www.8090g.cn/?url=
+# ckplayer无广,https://www.ckplayer.vip/jiexi/?url=
+# 盘古无广,http://www.pangujiexi.cc/jiexi.php?url=
+# 江湖,http://jx.vipmv.co/?url=
\ No newline at end of file
diff --git "a/js/\350\247\243\346\236\220.txt" "b/js/\350\247\243\346\236\220.txt"
deleted file mode 100644
index 7ecbb2f..0000000
--- "a/js/\350\247\243\346\236\220.txt"
+++ /dev/null
@@ -1,13 +0,0 @@
-m3u8tv,https://jx.m3u8.tv/jiexi/?url=
-思古解析,https://jsap.attakids.com/?url=
-云解析,https://jx.ppflv.com/?url=
-云解析2,https://jx.aidouer.net/?url=
-BL解析,https://vip.bljiex.cc/?v=
-虾米解析,https://jx.xmflv.com/?url=
-飞飞智能,https://y.9dan.cc/?v=
-左岸解析,https://jx.bozrc.com:4433/player/?url=
-ok解析,https://okjx.cc/?url=
-8090解析,https://www.8090g.cn/?url=
-ckplayer无广,https://www.ckplayer.vip/jiexi/?url=
-盘古无广,http://www.pangujiexi.cc/jiexi.php?url=
-江湖,http://jx.vipmv.co/?url=
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index dbe593b..586aeb7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,5 @@ gevent ; python_version < '3.9'
 gunicorn  ; python_version >= '3.6'
 supervisor ; sys_platform != 'win32'
 func_timeout
-easydict
\ No newline at end of file
+easydict
+jsonpath
\ No newline at end of file
diff --git a/templates/config.txt b/templates/config.txt
index 3a0f55a..1c18c9c 100644
--- a/templates/config.txt
+++ b/templates/config.txt
@@ -2,6 +2,7 @@
 {% if config.WALL_PAPER_ENABLE %}"wallpaper":"{{ host }}/pics",{% endif %}
 "dr_count": {{rules.list|length}},
 "mode": {{ mode }},
+"homepage":"https://gitcode.net/qq_32394351/dr_py",
 "sites": [{% for rule in rules.list %}{% if mode == 0 %}
 {
     "key":"dr_{{ rule.name }}",
@@ -38,9 +39,9 @@
 {
     "name": "{{ jx.name }}",
     "url": "{{ jx.url }}",
-    "type": 1,
+    "type": {{ jx.type }},
     "ext": {
-    "flag": ["qiyi", "爱奇艺", "奇艺", "qq", "腾讯", "youku", "优酷", "pptv", "PPTV", "letv", "乐视", "bilibili", "哔哩哔哩", "哔哩", "mgtv", "芒果","sohu", "xigua"],
+    "flag": ["qiyi","imgo","爱奇艺", "奇艺", "qq", "腾讯", "youku", "优酷", "pptv", "PPTV", "letv", "乐视", "bilibili", "哔哩哔哩", "哔哩", "mgtv", "芒果","sohu", "xigua"],
     "header": {
         "User-Agent": "Dart/2.14 (dart:io)"
     }
diff --git a/txt/issue.txt b/txt/issue.txt
index 3396ce9..d50d8c9 100644
--- a/txt/issue.txt
+++ b/txt/issue.txt
@@ -30,3 +30,5 @@ https://cuiqingcai.com/202232.html
 "lives":[{"group":"redirect","channels":[{"name":"直播","urls":["proxy://do=live&type=txt&ext={% if config.LIVE_MODE==0 %}{{base64Encode(host+'/lives')}}{% else %}{{ base64Encode('https://gitcode.net/qq_26898231/TVBox/-/raw/main/live/0830zb.txt')}}{% endif %}"]}]}],
 
 
+"homepage":"https://gitcode.net/qq_32394351/dr_py",
+"imgo",
\ No newline at end of file
diff --git a/utils/htmlParser.py b/utils/htmlParser.py
index 5088ea7..9d00e93 100644
--- a/utils/htmlParser.py
+++ b/utils/htmlParser.py
@@ -3,21 +3,23 @@
 # File  : htmlParser.py
 # Author: DaShenHan&道长-----先苦后甜，任凭晚风拂柳颜------
 # Date  : 2022/8/25
+import json
 
 from pyquery import PyQuery as pq
 from urllib.parse import urljoin
 import re
+from jsonpath import jsonpath
 
 class jsoup:
     def __init__(self,MY_URL=''):
         self.MY_URL = MY_URL
 
-    def test(self, text, string):
+    def test(self, text:str, string:str):
         searchObj = re.search(rf'{text}', string, re.M | re.I)
         test_ret = True if searchObj else False
         return test_ret
 
-    def pdfh(self,html,parse,pd=False):
+    def pdfh(self,html,parse:str,add_url=False):
         if not parse:
             return ''
         doc = pq(html)
@@ -40,7 +42,7 @@ class jsoup:
                 ret = ret.html()
             else:
                 ret = ret.attr(option)
-                if pd and option in ['url','src','href','data-original','data-src']:
+                if add_url and option in ['url','src','href','data-original','data-src']:
                     ret = urljoin(self.MY_URL,ret)
         else:
             # ret = doc(parse+':first')
@@ -52,7 +54,7 @@ class jsoup:
             ret = str(ret)
         return ret
 
-    def pdfa(self,html,parse):
+    def pdfa(self,html,parse:str):
         if not parse:
             return []
         if parse.find('&&') > -1:
@@ -64,12 +66,58 @@ class jsoup:
         # return [item.html() for item in doc(parse).items()]
         return [str(item) for item in doc(parse).items()]
 
-    def pd(self,html,parse):
+    def pd(self,html,parse:str):
         return self.pdfh(html,parse,True)
 
-    def pq(self,html):
+    def pq(self,html:str):
         return pq(html)
 
+    def pjfh(self,html,parse:str,add_url=False):
+        if not parse:
+            return ''
+        if isinstance(html,str):
+            # print(html)
+            try:
+               html = json.loads(html)
+               # html = eval(html)
+            except:
+                print('字符串转json失败')
+                return ''
+        if not parse.startswith('$.'):
+            parse = f'$.{parse}'
+        ret = jsonpath(html,parse)
+        if isinstance(ret,list):
+            ret = str(ret[0]) if ret[0] else ''
+        else:
+            ret = str(ret) if ret else ''
+        if add_url:
+            ret = urljoin(self.MY_URL, ret)
+        return ret
+
+    def pj(self, html, parse:str):
+        return self.pjfh(html, parse, True)
+
+    def pjfa(self,html,parse:str):
+        if not parse:
+            return []
+        if isinstance(html,str):
+            try:
+               html = json.loads(html)
+            except:
+                return ''
+        if not parse.startswith('$.'):
+            parse = f'$.{parse}'
+        # print(parse)
+        ret = jsonpath(html,parse)
+        # print(ret)
+        # print(type(ret))
+        # print(type(ret[0]))
+        # print(len(ret))
+        if isinstance(ret,list) and isinstance(ret[0],list) and len(ret) == 1:
+            # print('自动解包')
+            ret  = ret[0] # 自动解包
+        return ret or []
+
 if __name__ == '__main__':
     import requests
     from parsel import Selector
-- 
GitLab