From 926a37bdbc933e7d36447c590499d4a9222e6a55 Mon Sep 17 00:00:00 2001 From: hjdhnx Date: Wed, 30 Nov 2022 15:47:01 +0800 Subject: [PATCH] =?UTF-8?q?=E6=97=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- "py/\346\265\213\350\257\225pdf.py" | 4 ++-- txt/py/py_bilimd.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git "a/py/\346\265\213\350\257\225pdf.py" "b/py/\346\265\213\350\257\225pdf.py" index 76b3aa6..7974545 100644 --- "a/py/\346\265\213\350\257\225pdf.py" +++ "b/py/\346\265\213\350\257\225pdf.py" @@ -134,5 +134,5 @@ if __name__ == '__main__': # main() # main1() # main2() - # main3() - main4() \ No newline at end of file + main3() + # main4() \ No newline at end of file diff --git a/txt/py/py_bilimd.py b/txt/py/py_bilimd.py index 78f2e06..a4c77c8 100644 --- a/txt/py/py_bilimd.py +++ b/txt/py/py_bilimd.py @@ -4,12 +4,23 @@ import sys sys.path.append('..') from base.spider import Spider +# from base.htmlParser import jsoup import json from requests import session, utils import os import time import base64 +# def main3(): +# html = """ +#
+#

内容1我不获取的内容1我不获取的内容2内容2

+#
+# """ +# jsp = jsoup('https://www.cnblogs.com/lizhibk/p/8623543.html') +# a = jsp.pdfh(html, 'div p:eq(0)--span&&Text') +# print(a) +# return a class Spider(Spider): # 元类 默认的元类 type def getName(self): @@ -28,6 +39,7 @@ class Spider(Spider): # 元类 默认的元类 type def homeContent(self, filter): result = {} cateManual = { + # main3():"1", "番剧": "1", "国创": "4", "电影": "2", -- GitLab