Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
guluzhu
dr_py
提交
87a5059a
dr_py
项目概览
guluzhu
/
dr_py
与 Fork 源项目一致
Fork自
晚风拂柳颜 / dr_py
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
dr_py
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
87a5059a
编写于
10月 03, 2022
作者:
H
hjdhnx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
增加线上测试
上级
c0d02cbb
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
236 addition
and
107 deletion
+236
-107
js/version.txt
js/version.txt
+1
-1
libs/drpy.js
libs/drpy.js
+94
-106
libs/parseTags.js
libs/parseTags.js
+141
-0
未找到文件。
js/version.txt
浏览文件 @
87a5059a
3.7.19beta3
\ No newline at end of file
3.7.19beta4
\ No newline at end of file
libs/drpy.js
浏览文件 @
87a5059a
...
...
@@ -2,12 +2,15 @@ import 'https://gitcode.net/qq_32394351/dr_py/-/raw/master/libs/es6py.js';
// import 'http://192.168.10.103:5705/libs/es6py.js';
import
cheerio
from
'
https://gitcode.net/qq_32394351/dr_py/-/raw/master/libs/cheerio.min.js
'
;
// import cheerio from 'http://192.168.10.103:5705/libs/cheerio.min.js';
import
{
parseTags
,
urljoin
,
stringify
}
from
'
https://gitcode.net/qq_32394351/dr_py/-/raw/master/libs/parseTags.js
'
;
// import {parseTags,urljoin,stringify} from 'http://192.168.10.103:5705/libs/parseTags.js';
import
'
https://gitcode.net/qq_32394351/dr_py/-/raw/master/libs/drT.js
'
;
// import 'http://192.168.10.103:5705/libs/drT.js';
import
muban
from
'
https://gitcode.net/qq_32394351/dr_py/-/raw/master/js/模板.js
'
;
import
pa
from
"
./cheerio.min
"
;
// import muban from 'http://192.168.10.103:5705/admin/view/模板.js';
// const key = 'drpy_zbk';
function
init_test
(){
...
...
@@ -28,7 +31,7 @@ let rule = {};
/** 已知问题记录
* 1.影魔的jinjia2引擎不支持 {{fl}}对象直接渲染
* 2.import es6py.js但是里面的函数没有被装载进来.比如drpy规则报错setResult2 is undefiend
*
*
3.无法重复导入cheerio(怎么解决drpy和parseTag里都需要导入cheerio的问题) 无法在副文件导入cheerio
*
* todo: jsp:{pdfa,pdfh,pd},json:{pdfa,pdfh,pd},jq:{pdfa,pdfh,pd}
* **/
...
...
@@ -48,7 +51,9 @@ const OCR_RETRY = 3;//ocr验证重试次数
// const OCR_API = 'http://dm.mudery.com:10000';//ocr在线识别接口
// const OCR_API = 'http://192.168.3.239:5705/parse/ocr';//ocr在线识别接口
const
OCR_API
=
'
http://cms.nokia.press/parse/ocr
'
;
//ocr在线识别接口
var
MY_URL
;
// 全局注入变量,pd函数需要
if
(
typeof
(
MY_URL
)
===
'
undefined
'
){
var
MY_URL
;
// 全局注入变量,pd函数需要
}
var
VODS
=
[];
// 一级或者搜索需要的数据列表
var
vod
=
{};
//二级用单个影片详情
var
RKEY
;
// 源的唯一标识
...
...
@@ -57,6 +62,10 @@ var print;
var
log
;
var
fetch_params
;
var
oheaders
;
var
_pdfh
;
var
_pdfa
;
var
_pd
;
const
jsp
=
parseTags
.
jsp
;
/*** 后台需要实现的java方法并注入到js中 ***/
...
...
@@ -167,63 +176,6 @@ function clearItem(k){
local
.
delete
(
RKEY
,
k
);
}
/**
* url拼接(暂未实现)
* @param fromPath 初始当前页面url
* @param nowPath 相对当前页面url
* @returns {*}
*/
function
urljoin
(
fromPath
,
nowPath
)
{
fromPath
=
fromPath
||
''
;
nowPath
=
nowPath
||
''
;
return
joinUrl
(
fromPath
,
nowPath
);
// try {
// // import Uri from './uri.min.js';
// // var Uri = require('./uri.min.js');
// // eval(request('https://cdn.bootcdn.net/ajax/libs/URI.js/1.19.11/URI.min.js'));
// // let new_uri = URI(nowPath, fromPath);
// let new_uri = Uri(nowPath, fromPath);
// new_uri = new_uri.toString();
// // console.log(new_uri);
// // return fromPath + nowPath
// return new_uri
// }
// catch (e) {
// console.log('urljoin发生错误:'+e.message);
// if(nowPath.startsWith('http')){
// return nowPath
// }if(nowPath.startsWith('/')){
// return getHome(fromPath)+nowPath
// }
// return fromPath+nowPath
// }
}
/**
* 重写pd方法-增加自动urljoin(没法重写,改个名继续骗)
* @param html
* @param parse
* @param uri
* @returns {*}
*/
function
pD
(
html
,
parse
,
uri
){
let
ret
=
pdfh
(
html
,
parse
);
if
(
typeof
(
uri
)
===
'
undefined
'
||!
uri
){
uri
=
''
;
}
if
(
/
(
url|src|href|data-original|data-src
)
$/
.
test
(
parse
)){
if
(
/http/
.
test
(
ret
)){
ret
=
ret
.
substr
(
ret
.
indexOf
(
'
http
'
));
}
else
{
ret
=
urljoin
(
MY_URL
,
ret
)
}
}
// MY_URL = getItem('MY_URL',MY_URL);
// console.log(`规则${RKEY}打印MY_URL:${MY_URL},uri:${uri}`);
return
ret
}
/*** js自封装的方法 ***/
/**
...
...
@@ -461,14 +413,15 @@ function homeVodParse(homeVodObj){
// setItem('MY_URL',MY_URL);
console
.
log
(
MY_URL
);
let
p
=
homeVodObj
.
推荐
;
if
(
!
p
){
if
(
!
p
||
typeof
(
p
)
!==
'
string
'
){
return
'
{}
'
}
if
(
typeof
(
p
)
===
'
string
'
&&
p
.
trim
().
startsWith
(
'
js:
'
)){
p
=
p
.
trim
();
if
(
p
.
startsWith
(
'
js:
'
)){
const
TYPE
=
'
home
'
;
var
input
=
MY_URL
;
const
HOST
=
rule
.
host
;
eval
(
p
.
trim
().
replace
(
'
js:
'
,
''
));
eval
(
p
.
replace
(
'
js:
'
,
''
));
d
=
VODS
;
}
else
{
p
=
p
.
split
(
'
;
'
);
...
...
@@ -477,30 +430,34 @@ function homeVodParse(homeVodObj){
}
else
if
(
homeVodObj
.
double
&&
p
.
length
<
6
)
{
return
'
{}
'
}
let
_ps
=
parseTags
.
getParse
(
p
[
0
]);
_pdfa
=
_ps
.
pdfa
;
_pdfh
=
_ps
.
pdfh
;
_pd
=
_ps
.
pd
;
p
[
0
]
=
p
[
0
].
replace
(
/^
(
jsp:|json:|jq:
)
/
,
''
);
// print(p[0]);
let
html
=
getHtml
(
MY_URL
);
try
{
console
.
log
(
'
double:
'
+
homeVodObj
.
double
);
if
(
homeVodObj
.
double
)
{
p
[
0
]
=
p
[
0
].
trim
().
startsWith
(
'
json:
'
)
?
p
[
0
].
replace
(
'
json:
'
,
''
)
:
p
[
0
];
// console.log(p[0]);
let
items
=
pdfa
(
html
,
p
[
0
]);
let
items
=
_pdfa
(
html
,
p
[
0
]);
// console.log(items.length);
for
(
let
item
of
items
)
{
// console.log(p[1]);
let
items2
=
pdfa
(
item
,
p
[
1
]);
let
items2
=
_
pdfa
(
item
,
p
[
1
]);
// console.log(items2.length);
for
(
let
item2
of
items2
)
{
try
{
let
title
=
pdfh
(
item2
,
p
[
2
]);
let
title
=
_
pdfh
(
item2
,
p
[
2
]);
let
img
=
''
;
try
{
img
=
pD
(
item2
,
p
[
3
])
img
=
_pd
(
item2
,
p
[
3
])
}
catch
(
e
)
{
}
let
desc
=
pdfh
(
item2
,
p
[
4
]);
let
desc
=
_
pdfh
(
item2
,
p
[
4
]);
let
links
=
[];
for
(
let
p5
of
p
[
5
].
split
(
'
+
'
))
{
let
link
=
!
homeVodObj
.
detailUrl
?
pD
(
item2
,
p5
,
MY_URL
)
:
pdfh
(
item2
,
p5
);
let
link
=
!
homeVodObj
.
detailUrl
?
_pd
(
item2
,
p5
,
MY_URL
)
:
_
pdfh
(
item2
,
p5
);
links
.
push
(
link
);
}
let
vod
=
{
...
...
@@ -509,9 +466,10 @@ function homeVodParse(homeVodObj){
vod_remarks
:
desc
,
vod_id
:
links
.
join
(
'
$
'
)
};
// print(vod);
d
.
push
(
vod
);
}
catch
(
e
)
{
console
.
log
(
'
首页列表处理发生错误:
'
+
e
.
message
);
}
}
...
...
@@ -521,21 +479,20 @@ function homeVodParse(homeVodObj){
}
else
{
p
[
0
]
=
p
[
0
].
trim
().
startsWith
(
'
json:
'
)
?
p
[
0
].
replace
(
'
json:
'
,
''
)
:
p
[
0
];
let
items
=
pdfa
(
html
,
p
[
0
]);
let
items
=
_pdfa
(
html
,
p
[
0
]);
for
(
let
item
of
items
)
{
try
{
let
title
=
pdfh
(
item
,
p
[
1
]);
let
title
=
_
pdfh
(
item
,
p
[
1
]);
let
img
=
''
;
try
{
img
=
pD
(
item
,
p
[
2
],
MY_URL
);
img
=
_pd
(
item
,
p
[
2
],
MY_URL
);
}
catch
(
e
)
{
}
let
desc
=
pdfh
(
item
,
p
[
3
]);
let
desc
=
_
pdfh
(
item
,
p
[
3
]);
let
links
=
[];
for
(
let
p5
of
p
[
4
].
split
(
'
+
'
))
{
let
link
=
!
homeVodObj
.
detailUrl
?
pD
(
item
,
p5
,
MY_URL
)
:
pdfh
(
item
,
p5
);
let
link
=
!
homeVodObj
.
detailUrl
?
_pd
(
item
,
p5
,
MY_URL
)
:
_
pdfh
(
item
,
p5
);
links
.
push
(
link
);
}
let
vod
=
{
...
...
@@ -571,6 +528,9 @@ function homeVodParse(homeVodObj){
*/
function
categoryParse
(
cateObj
)
{
let
p
=
cateObj
.
一级
;
if
(
!
p
||
typeof
(
p
)
!==
'
string
'
){
return
'
{}
'
}
let
d
=
[];
// let url = cateObj.url.replaceAll('fyclass', cateObj.tid).replaceAll('fypage', cateObj.pg);
let
url
=
cateObj
.
url
.
replaceAll
(
'
fyclass
'
,
cateObj
.
tid
);
...
...
@@ -611,8 +571,8 @@ function categoryParse(cateObj) {
MY_URL
=
url
;
// setItem('MY_URL',MY_URL);
console
.
log
(
MY_URL
);
if
(
typeof
(
p
)
===
'
string
'
&&
p
.
trim
()
.
startsWith
(
'
js:
'
)){
p
=
p
.
trim
();
if
(
p
.
startsWith
(
'
js:
'
)){
const
MY_CATE
=
cateObj
.
tid
;
const
MY_FL
=
cateObj
.
extend
;
const
TYPE
=
'
cate
'
;
...
...
@@ -625,16 +585,21 @@ function categoryParse(cateObj) {
if
(
p
.
length
<
5
)
{
return
'
{}
'
}
let
_ps
=
parseTags
.
getParse
(
p
[
0
]);
_pdfa
=
_ps
.
pdfa
;
_pdfh
=
_ps
.
pdfh
;
_pd
=
_ps
.
pd
;
p
[
0
]
=
p
[
0
].
replace
(
/^
(
jsp:|json:|jq:
)
/
,
''
);
try
{
let
html
=
getHtml
(
MY_URL
);
if
(
html
)
{
let
list
=
pdfa
(
html
,
p
[
0
]);
let
list
=
_
pdfa
(
html
,
p
[
0
]);
list
.
forEach
(
it
=>
{
d
.
push
({
'
vod_id
'
:
pD
(
it
,
p
[
4
],
MY_URL
),
'
vod_name
'
:
pdfh
(
it
,
p
[
1
]),
'
vod_pic
'
:
pD
(
it
,
p
[
2
],
MY_URL
),
'
vod_remarks
'
:
pdfh
(
it
,
p
[
3
]),
'
vod_id
'
:
_pd
(
it
,
p
[
4
],
MY_URL
),
'
vod_name
'
:
_
pdfh
(
it
,
p
[
1
]),
'
vod_pic
'
:
_pd
(
it
,
p
[
2
],
MY_URL
),
'
vod_remarks
'
:
_
pdfh
(
it
,
p
[
3
]),
});
});
}
...
...
@@ -663,11 +628,15 @@ function searchParse(searchObj) {
return
'
{}
'
}
let
p
=
searchObj
.
搜索
===
'
*
'
&&
rule
.
一级
?
rule
.
一级
:
searchObj
.
搜索
;
if
(
!
p
||
typeof
(
p
)
!==
'
string
'
){
return
'
{}
'
}
p
=
p
.
trim
();
let
url
=
searchObj
.
searchUrl
.
replaceAll
(
'
**
'
,
searchObj
.
wd
).
replaceAll
(
'
fypage
'
,
searchObj
.
pg
);
MY_URL
=
url
;
console
.
log
(
MY_URL
);
// setItem('MY_URL',MY_URL);
if
(
typeof
(
p
)
===
'
string
'
&&
p
.
trim
()
.
startsWith
(
'
js:
'
)){
if
(
p
.
startsWith
(
'
js:
'
)){
const
TYPE
=
'
search
'
;
const
MY_PAGE
=
searchObj
.
pg
;
const
KEY
=
searchObj
.
wd
;
...
...
@@ -680,6 +649,11 @@ function searchParse(searchObj) {
if
(
p
.
length
<
5
)
{
return
'
{}
'
}
let
_ps
=
parseTags
.
getParse
(
p
[
0
]);
_pdfa
=
_ps
.
pdfa
;
_pdfh
=
_ps
.
pdfh
;
_pd
=
_ps
.
pd
;
p
[
0
]
=
p
[
0
].
replace
(
/^
(
jsp:|json:|jq:
)
/
,
''
);
try
{
let
html
=
getHtml
(
MY_URL
);
if
(
html
)
{
...
...
@@ -698,16 +672,16 @@ function searchParse(searchObj) {
console
.
log
(
'
搜索结果源码未包含关键字,疑似搜索失败,正为您打印结果源码
'
);
console
.
log
(
html
);
}
let
list
=
pdfa
(
html
,
p
[
0
]);
let
list
=
_
pdfa
(
html
,
p
[
0
]);
list
.
forEach
(
it
=>
{
let
ob
=
{
'
vod_id
'
:
pD
(
it
,
p
[
4
],
MY_URL
),
'
vod_name
'
:
pdfh
(
it
,
p
[
1
]),
'
vod_pic
'
:
pD
(
it
,
p
[
2
],
MY_URL
),
'
vod_remarks
'
:
pdfh
(
it
,
p
[
3
]),
'
vod_id
'
:
_pd
(
it
,
p
[
4
],
MY_URL
),
'
vod_name
'
:
_
pdfh
(
it
,
p
[
1
]),
'
vod_pic
'
:
_pd
(
it
,
p
[
2
],
MY_URL
),
'
vod_remarks
'
:
_
pdfh
(
it
,
p
[
3
]),
};
if
(
p
.
length
>
5
&&
p
[
5
])
{
ob
.
vod_content
=
pdfh
(
it
,
p
[
5
]);
ob
.
vod_content
=
_
pdfh
(
it
,
p
[
5
]);
}
d
.
push
(
ob
);
});
...
...
@@ -767,20 +741,33 @@ function detailParse(detailObj){
if
(
!
html
){
html
=
getHtml
(
MY_URL
);
}
let
_ps
;
if
(
p
.
is_json
){
_ps
=
parseTags
.
json
;
}
else
if
(
p
.
is_jsp
){
_ps
=
parseTags
.
jsp
;
}
else
if
(
p
.
is_jq
){
_ps
=
parseTags
.
jq
;
}
else
{
_ps
=
parseTags
.
jq
;
}
_pdfa
=
_ps
.
pdfa
;
_pdfh
=
_ps
.
pdfh
;
_pd
=
_ps
.
pd
;
if
(
p
.
title
){
let
p1
=
p
.
title
.
split
(
'
;
'
);
vod
.
vod_name
=
pdfh
(
html
,
p1
[
0
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
();
let
type_name
=
p1
.
length
>
1
?
pdfh
(
html
,
p1
[
1
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
():
''
;
vod
.
vod_name
=
_
pdfh
(
html
,
p1
[
0
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
();
let
type_name
=
p1
.
length
>
1
?
_
pdfh
(
html
,
p1
[
1
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
():
''
;
vod
.
type_name
=
type_name
||
vod
.
type_name
;
}
if
(
p
.
desc
){
try
{
let
p1
=
p
.
desc
.
split
(
'
;
'
);
vod
.
vod_remarks
=
pdfh
(
html
,
p1
[
0
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
();
vod
.
vod_year
=
p1
.
length
>
1
?
pdfh
(
html
,
p1
[
1
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
():
''
;
vod
.
vod_area
=
p1
.
length
>
2
?
pdfh
(
html
,
p1
[
2
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
():
''
;
vod
.
vod_actor
=
p1
.
length
>
3
?
pdfh
(
html
,
p1
[
3
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
():
''
;
vod
.
vod_director
=
p1
.
length
>
4
?
pdfh
(
html
,
p1
[
4
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
():
''
;
vod
.
vod_remarks
=
_
pdfh
(
html
,
p1
[
0
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
();
vod
.
vod_year
=
p1
.
length
>
1
?
_
pdfh
(
html
,
p1
[
1
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
():
''
;
vod
.
vod_area
=
p1
.
length
>
2
?
_
pdfh
(
html
,
p1
[
2
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
():
''
;
vod
.
vod_actor
=
p1
.
length
>
3
?
_
pdfh
(
html
,
p1
[
3
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
():
''
;
vod
.
vod_director
=
p1
.
length
>
4
?
_
pdfh
(
html
,
p1
[
4
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
():
''
;
}
catch
(
e
)
{
...
...
@@ -789,14 +776,14 @@ function detailParse(detailObj){
if
(
p
.
content
){
try
{
let
p1
=
p
.
content
.
split
(
'
;
'
);
vod
.
vod_content
=
pdfh
(
html
,
p1
[
0
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
();
vod
.
vod_content
=
_
pdfh
(
html
,
p1
[
0
]).
replaceAll
(
'
\n
'
,
'
'
).
trim
();
}
catch
(
e
)
{}
}
if
(
p
.
img
){
try
{
let
p1
=
p
.
img
.
split
(
'
;
'
);
vod
.
vod_pic
=
pD
(
html
,
p1
[
0
],
MY_URL
);
vod
.
vod_pic
=
_pd
(
html
,
p1
[
0
],
MY_URL
);
}
catch
(
e
)
{}
}
...
...
@@ -811,11 +798,11 @@ function detailParse(detailObj){
if
(
p
.
tabs
){
let
p_tab
=
p
.
tabs
.
split
(
'
;
'
)[
0
];
console
.
log
(
p_tab
);
let
vHeader
=
pdfa
(
html
,
p_tab
);
let
vHeader
=
_
pdfa
(
html
,
p_tab
);
console
.
log
(
vHeader
.
length
);
for
(
let
v
of
vHeader
){
let
v_title
=
pdfh
(
v
,
'
body&&Text
'
);
let
v_title
=
_
pdfh
(
v
,
'
body&&Text
'
);
console
.
log
(
v_title
);
if
(
tab_exclude
&&
(
new
RegExp
(
tab_exclude
)).
test
(
v_title
)){
continue
;
...
...
@@ -841,16 +828,17 @@ function detailParse(detailObj){
// console.log(html);
let
vodList
=
[];
try
{
vodList
=
pdfa
(
html
,
p1
);
vodList
=
_
pdfa
(
html
,
p1
);
console
.
log
(
'
len(vodList):
'
+
vodList
.
length
);
}
catch
(
e
)
{
// console.log(e.message);
}
let
new_vod_list
=
[];
let
tabName
=
tab_ext
?
pdfh
(
html
,
tab_ext
):
tab_name
;
let
tabName
=
tab_ext
?
_
pdfh
(
html
,
tab_ext
):
tab_name
;
console
.
log
(
tabName
);
vodList
.
forEach
(
it
=>
{
new_vod_list
.
push
(
pdfh
(
it
,
'
body&&Text
'
)
+
'
$
'
+
pD
(
it
,
'
a&&href
'
,
MY_URL
));
// new_vod_list.push(_pdfh(it,'body&&Text')+'$'+_pd(it,'a&&href',MY_URL));
new_vod_list
.
push
(
_pdfh
(
it
,
'
Text
'
)
+
'
$
'
+
_pd
(
it
,
'
a&&href
'
,
MY_URL
));
});
let
vlist
=
new_vod_list
.
join
(
'
#
'
);
vod_tab_list
.
push
(
vlist
);
...
...
libs/parseTags.js
0 → 100644
浏览文件 @
87a5059a
// import cheerio from 'https://gitcode.net/qq_32394351/dr_py/-/raw/master/libs/cheerio.min.js';
if
(
typeof
(
MY_URL
)
===
'
undefined
'
){
var
MY_URL
;
// 全局注入变量,pd函数需要
}
/**
* url拼接
* @param fromPath 初始当前页面url
* @param nowPath 相对当前页面url
* @returns {*}
*/
export
function
urljoin
(
fromPath
,
nowPath
)
{
fromPath
=
fromPath
||
''
;
nowPath
=
nowPath
||
''
;
return
joinUrl
(
fromPath
,
nowPath
);
// try {
// // import Uri from './uri.min.js';
// // var Uri = require('./uri.min.js');
// // eval(request('https://cdn.bootcdn.net/ajax/libs/URI.js/1.19.11/URI.min.js'));
// // let new_uri = URI(nowPath, fromPath);
// let new_uri = Uri(nowPath, fromPath);
// new_uri = new_uri.toString();
// // console.log(new_uri);
// // return fromPath + nowPath
// return new_uri
// }
// catch (e) {
// console.log('urljoin发生错误:'+e.message);
// if(nowPath.startsWith('http')){
// return nowPath
// }if(nowPath.startsWith('/')){
// return getHome(fromPath)+nowPath
// }
// return fromPath+nowPath
// }
}
/**
* 重写pd方法-增加自动urljoin(没法重写,改个名继续骗)
* @param html
* @param parse
* @param uri
* @returns {*}
*/
export
function
pD
(
html
,
parse
,
uri
){
let
ret
=
pdfh
(
html
,
parse
);
if
(
typeof
(
uri
)
===
'
undefined
'
||!
uri
){
uri
=
''
;
}
if
(
/
(
url|src|href|data-original|data-src
)
$/
.
test
(
parse
)){
if
(
/http/
.
test
(
ret
)){
ret
=
ret
.
substr
(
ret
.
indexOf
(
'
http
'
));
}
else
{
ret
=
urljoin
(
MY_URL
,
ret
)
}
}
// MY_URL = getItem('MY_URL',MY_URL);
// console.log(`规则${RKEY}打印MY_URL:${MY_URL},uri:${uri}`);
return
ret
}
export
var
parseTags
=
{
jsp
:{
pdfh
:
pdfh
,
pdfa
:
pdfa
,
pd
:
pD
,
},
json
:{
pdfh
(
html
,
parse
)
{
if
(
!
parse
||
!
parse
.
trim
()){
return
''
;
}
if
(
typeof
(
html
)
===
'
string
'
){
html
=
JSON
.
parse
(
html
);
}
parse
=
parse
.
trim
();
if
(
!
parse
.
startsWith
(
'
$.
'
)){
parse
=
'
$.
'
+
parse
;
}
parse
=
parse
.
split
(
'
||
'
);
for
(
let
ps
of
parse
)
{
let
ret
=
cheerio
.
jp
(
ps
,
html
);
if
(
Array
.
isArray
(
ret
)){
ret
=
ret
[
0
]
||
''
;
}
else
{
ret
=
ret
||
''
}
if
(
ret
&&
typeof
(
ret
)
!==
'
string
'
){
ret
=
ret
.
toString
();
}
if
(
ret
){
return
ret
}
}
return
''
;
},
pdfa
(
html
,
parse
)
{
if
(
!
parse
||
!
parse
.
trim
()){
return
''
;
}
if
(
typeof
(
html
)
===
'
string
'
){
html
=
JSON
.
parse
(
html
);
}
parse
=
parse
.
trim
()
if
(
!
parse
.
startsWith
(
'
$.
'
)){
parse
=
'
$.
'
+
parse
;
}
let
ret
=
cheerio
.
jp
(
parse
,
html
);
if
(
Array
.
isArray
(
ret
)
&&
Array
.
isArray
(
ret
[
0
])
&&
ret
.
length
===
1
){
return
ret
[
0
]
||
[]
}
return
ret
||
[]
},
pd
:
function
(
html
,
parse
){
let
ret
=
this
.
pdfh
(
html
,
parse
);
if
(
ret
){
return
urljoin
(
MY_URL
,
ret
);
}
return
ret
},
},
jq
:{
pdfh
:
pdfh
,
pdfa
:
pdfa
,
pd
:
pD
,
},
getParse
(
p0
){
//非js开头的情况自动获取解析标签
if
(
p0
.
startsWith
(
'
jsp:
'
)){
return
this
.
jsp
}
else
if
(
p0
.
startsWith
(
'
json:
'
)){
return
this
.
json
}
else
if
(
p0
.
startsWith
(
'
jq:
'
)){
return
this
.
jq
}
else
{
return
this
.
jq
}
}
};
export
var
stringify
=
JSON
.
stringify
;
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录