完成pdfh系列函数

9358f192 · hjdhnx · a2913258 · 9358f192 · 9358f192 · 9358f192
Showing with 165 addition and 9 deletion

js/鸭奈飞.js js/鸭奈飞.js +6 -4

models/cms.py models/cms.py +103 -2

pycms.json pycms.json +3 -3

utils/config.py utils/config.py +11 -0

utils/htmlParser.py utils/htmlParser.py +42 -0

未找到文件。
--- a/js/鸭奈飞.js
+++ b/js/鸭奈飞.js
 var rule = {
    title:'鸭奈飞',
-    url:'https://yanetflix.com/vodshow/dianying--------fypage---.html',
+    url:'https://yanetflix.com/vodshow/fyclass--------fypage---.html',
+    // url:'https://yanetflix.com/vodshow/',
    searchUrl:'/vodsearch/**----------fypage---.html',
    ua:'MOBILE_UA',
-    class_name:'',
-    class_url:'测试',
-    一级:'',
+    class_name:'电影&连续剧&综艺&动漫',
+    class_url:'dianying&lianxuju&zongyi&dongman',
+    一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
    二级:'',
    搜索:'',
 }
\ No newline at end of file
--- a/models/cms.py
+++ b/models/cms.py
@@ -3,11 +3,15 @@
 # File  : cms.py
 # Author: DaShenHan&道长-----先苦后甜，任凭晚风拂柳颜------
 # Date  : 2022/8/25
+import requests
+
 from utils.web import *
+from utils.config import config
+from utils.htmlParser import jsoup

 class CMS:
    def __init__(self,rule):
-        self.url = rule.get('url','')
+        self.url = rule.get('url','').rstrip('/')
        self.searchUrl = rule.get('searchUrl','')
        ua = rule.get('ua','')
        if ua == 'MOBILE_UA':
@@ -23,14 +27,111 @@ class CMS:
        self.二级 = rule.get('二级','')
        self.搜索 = rule.get('搜索','')
        self.title = rule.get('title','')
+        self.filter = rule.get('filter',[])
+        self.extend = rule.get('extend',[])

    def getName(self):
        return self.title

+    def homeContent(self):
+        # yanaifei
+        # https://yanetflix.com/vodtype/dianying.html
+        result = {}
+        class_names = self.class_name.split('&')
+        class_urls = self.class_url.split('&')
+        cnt = min(len(class_urls),len(class_names))
+        classes = []
+        for i in range(cnt):
+            classes.append({
+                'type_name': class_names[i],
+                'type_id': class_urls[i]
+            })
+        result['class'] = classes
+        if self.filter:
+            result['filters'] = config['filter']
+        return result
+
+    def homeVideoContent(self):
+        rsp = self.fetch("https://www.genmov.com/", headers=self.header)
+        root = self.html(rsp.text)
+        aList = root.xpath("//div[@class='module module-wrapper']//div[@class='module-item']")
+        videos = []
+        for a in aList:
+            name = a.xpath(".//div[@class='module-item-pic']/a/@title")[0]
+            pic = a.xpath(".//div[@class='module-item-pic']/img/@data-src")[0]
+            mark = a.xpath("./div[@class='module-item-text']/text()")[0]
+            sid = a.xpath(".//div[@class='module-item-pic']/a/@href")[0]
+            sid = self.regStr(sid, "/video/(\\S+).html")
+            videos.append({
+                "vod_id": sid,
+                "vod_name": name,
+                "vod_pic": pic,
+                "vod_remarks": mark
+            })
+        result = {
+            'list': videos
+        }
+        return result
+
+    def categoryContent(self, fyclass, fypage):
+        """
+        一级带分类的数据返回
+        :param fyclass: 分类标识
+        :param fypage: 页码
+        :return: cms一级数据
+        """
+
+        result = {}
+        # urlParams = ["", "", "", "", "", "", "", "", "", "", "", ""]
+        # urlParams = [""] * 12
+        # urlParams[0] = tid
+        # urlParams[8] = str(pg)
+        # for key in self.extend:
+        #     urlParams[int(key)] = self.extend[key]
+        # params = '-'.join(urlParams)
+        # print(params)
+        # url = self.url + '/{0}.html'.format(params)
+        pg = str(fypage)
+        url = self.url.replace('fyclass',fyclass).replace('fypage',fypage)
+        print(url)
+        headers = {'user-agent': self.ua}
+        r = requests.get(url, headers=headers)
+        p = self.一级.split(';')  # 解析
+        jsp = jsoup(self.url)
+        pdfh = jsp.pdfh
+        pdfa = jsp.pdfa
+        pd = jsp.pd
+
+        items = pdfa(r.text, p[0])
+        videos = []
+        for item in items:
+            # print(item)
+            title = pdfh(item, p[1])
+            img = pd(item, p[2])
+            desc = pdfh(item, p[3])
+            link = pd(item, p[4])
+            content = ''
+            # sid = self.regStr(sid, "/video/(\\S+).html")
+            videos.append({
+                "vod_id": link,
+                "vod_name": title,
+                "vod_pic": img,
+                "vod_remarks": desc,
+                "vod_content": content,
+            })
+        result['list'] = videos
+        result['page'] = pg
+        result['pagecount'] = 9999
+        result['limit'] = 90
+        result['total'] = 999999
+        return result
+
 if __name__ == '__main__':
    from utils import parser
    js_path = f'js/鸭奈飞.js'
    ctx, js_code = parser.runJs(js_path)
    rule = ctx.eval('rule')
    cms = CMS(rule)
-    print(cms.title)
\ No newline at end of file
+    print(cms.title)
+    print(cms.homeContent())
+    cms.categoryContent('dianying',1)
\ No newline at end of file
--- a/pycms.json
+++ b/pycms.json
@@ -3,7 +3,7 @@
    "name":"鸭奈飞",
    "type":4,
    "api":"http://127.0.0.1:9000/vod?rule=鸭奈飞",
-    "searchable":1,
-    "quickSearch":1,
-    "filterable":0
+    "searchable": 1,
+    "quickSearch": 1,
+    "filterable": 1
 }
\ No newline at end of file
--- a/utils/config.py
+++ b/utils/config.py
--- a/utils/htmlParser.py
+++ b/utils/htmlParser.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# File  : htmlParser.py
+# Author: DaShenHan&道长-----先苦后甜，任凭晚风拂柳颜------
+# Date  : 2022/8/25
+
+from pyquery import PyQuery as pq
+from urllib.parse import urljoin
+
+class jsoup:
+    def __init__(self,MY_URL=''):
+        self.MY_URL = MY_URL
+
+    def pdfh(self,html,parse,pd=False):
+        doc = pq(html)
+        option = None
+        if parse.find('&&') > -1:
+            option = parse.split('&&')[1]
+            parse = parse.split('&&')[0]
+
+        ret = doc(parse)
+        if option:
+            if option == 'Text':
+                ret = ret.text()
+            elif option == 'Html':
+                ret = ret.html()
+            else:
+                ret = ret.attr(option)
+                if pd and option in ['url','src','href','data-original']:
+                    ret = urljoin(self.MY_URL,ret)
+        else:
+            ret = ret('fisrt').html()
+        return ret
+
+    def pdfa(self,html,parse):
+        doc = pq(html)
+        # print(doc(parse)[0])
+        # return [item.html() for item in doc(parse).items()]
+        return [str(item) for item in doc(parse).items()]
+
+    def pd(self,html,parse):
+        return self.pdfh(html,parse,True)
\ No newline at end of file