diff --git a/js/version.txt b/js/version.txt index ae94c935ccb6cae94f9706e49b5c32325689be32..63c0213c09e36a5470a2d15df013c8bf652c03dd 100644 --- a/js/version.txt +++ b/js/version.txt @@ -1 +1 @@ -3.7.6 \ No newline at end of file +3.7.5beta10 \ No newline at end of file diff --git "a/py/\346\265\213\350\257\225pdf.py" "b/py/\346\265\213\350\257\225pdf.py" index 9ad060f382dab5864b4f8ef5a702ad9f2d0dec22..04386f93cbdee5817e87be314e32a58ffc96c477 100644 --- "a/py/\346\265\213\350\257\225pdf.py" +++ "b/py/\346\265\213\350\257\225pdf.py" @@ -29,8 +29,10 @@ def main(): print(a) a = jsp.pdfh(lis[0], 'a:eq(1) li img') print(a) - a = jsp.pd(lis[0], 'a&&li&&img&&src') - print(a) + a = jsp.pd(lis[0], 'a:eq(1)&&li&&img&&src') + print('src:',a) + a = jsp.pd(lis[0], 'a&&href') + print('href:', a) def main1(): url = 'https://www.lanhua.tv/voddetail/7420.html' @@ -89,7 +91,7 @@ def main3(): print(a) if __name__ == '__main__': - # main() + main() # main1() # main2() - main3() \ No newline at end of file + # main3() \ No newline at end of file diff --git a/utils/htmlParser.py b/utils/htmlParser.py index 9bf4fdb11f925328f8b36b38a7fa353ab20a736b..0f028f21aef59e4419ae4f6bc3178fb2b6196d4d 100644 --- a/utils/htmlParser.py +++ b/utils/htmlParser.py @@ -160,7 +160,7 @@ class jsoup: res = [item.outerHtml() for item in ret.items()] return res - def pdfh(self, html, parse: str, add_url=False): + def pdfh(self, html, parse: str, add_url=False, base_url: str = ''): if not all([html, parse]): return '' if PARSE_CACHE: @@ -210,13 +210,15 @@ class jsoup: if 'http' in ret: ret = ret[ret.find('http'):] else: - ret = urljoin(self.MY_URL, ret) + if not base_url: + base_url = self.MY_URL + ret = urljoin(base_url, ret) else: ret = ret.outerHtml() return ret - def pd(self, html, parse: str): - return self.pdfh(html, parse, True) + def pd(self, html, parse: str, base_url: str = ''): + return self.pdfh(html, parse, True, base_url) def pq(self, html: str): return pq(html)