diff --git a/utils/HtmlParser.java b/utils/HtmlParser.java
new file mode 100644
index 0000000000000000000000000000000000000000..b8477908a9a74d61186b4ee62bf592afc6c064f3
--- /dev/null
+++ b/utils/HtmlParser.java
@@ -0,0 +1,294 @@
+package com.github.tvbox.osc.util.js;
+
+import android.text.TextUtils;
+
+import com.quickjs.android.JSUtils;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class HtmlParser {
+ private static String pdfh_html = "";
+ private static String pdfa_html = "";
+ private static final Pattern p = Pattern.compile("url\\((.*?)\\)", Pattern.MULTILINE | Pattern.DOTALL);
+ private static final Pattern NOADD_INDEX = Pattern.compile(":eq|:lt|:gt|:first|:last|^body$|^#"); // 不自动加eq下标索引
+ private static final Pattern URLJOIN_ATTR = Pattern.compile("(url|src|href|-original|-src|-play|-url)$", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE); // 需要自动urljoin的属性
+ private static Document pdfh_doc = null;
+ private static Document pdfa_doc = null;
+
+ public static String joinUrl(String parent, String child) {
+ if (JSUtils.isEmpty(parent)) {
+ return child;
+ }
+
+ URL url;
+ String q = parent;
+ try {
+ url = new URL(new URL(parent), child);
+ q = url.toExternalForm();
+ } catch (MalformedURLException e) {
+ e.printStackTrace();
+ }
+// if (q.contains("#")) {
+// q = q.replaceAll("^(.+?)#.*?$", "$1");
+// }
+ return q;
+ }
+
+ public static class Painfo {
+ public String nparse_rule;
+ public int nparse_index;
+ public List excludes;
+ }
+
+ private static Painfo getParseInfo(String nparse) {
+ /*
+ 根据传入的单规则获取 parse规则,索引位置,排除列表 -- 可以用于剔除元素,支持多个,按标签剔除,按id剔除等操作
+ :param nparse:
+ :return:*/
+ Painfo painfo = new Painfo();
+ //List excludes = new ArrayList<>(); //定义排除列表默认值为空
+ //int nparse_index; //定义位置索引默认值为0
+ painfo.nparse_rule = nparse; //定义规则默认值为本身
+ if (nparse.contains(":eq")) {
+ painfo.nparse_rule = nparse.split(":")[0];
+ String nparse_pos = nparse.split(":")[1];
+
+ if (painfo.nparse_rule.contains("--")) {
+ String[] rules = painfo.nparse_rule.split("--");
+ painfo.excludes = new ArrayList<>(Arrays.asList(rules));
+ painfo.excludes.remove(0);
+ painfo.nparse_rule = rules[0];
+ } else if (nparse_pos.contains("--")) {
+ String[] rules = nparse_pos.split("--");
+ painfo.excludes = new ArrayList<>(Arrays.asList(rules));
+ painfo.excludes.remove(0);
+ nparse_pos = rules[0];
+ }
+
+ try {
+ painfo.nparse_index = Integer.parseInt(nparse_pos.replace("eq(", "").replace(")", ""));
+ } catch (Exception e1) {
+ painfo.nparse_index = 0;
+ }
+ } else {
+ if (nparse.contains("--")) {
+ String[] rules = painfo.nparse_rule.split("--");
+ painfo.excludes = new ArrayList<>(Arrays.asList(rules));
+ painfo.excludes.remove(0);
+ painfo.nparse_rule = rules[0];
+ }
+ }
+ return painfo;
+ }
+
+ public static boolean isIndex(String str) {
+ if (JSUtils.isEmpty(str)) {
+ return false;
+ }
+ for (String str2 : new String[]{":eq", ":lt", ":gt", ":first", ":last", "body", "#"}) {
+ if (str.contains(str2)) {
+ if (str2.equals("body") || str2.equals("#")) {
+ return str.startsWith(str2);
+ }
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public static boolean isUrl(String str) {
+ if (JSUtils.isEmpty(str)) {
+ return false;
+ }
+ for (String str2 : new String[]{"url", "src", "href", "-original", "-play"}) {
+ if (str.contains(str2)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static String parseHikerToJq(String parse, boolean first) {
+ /*
+ 海阔解析表达式转原生表达式,自动补eq,如果传了first就最后一个也取eq(0)
+ :param parse:
+ :param first:
+ :return:
+ */
+ // 不自动加eq下标索引
+ if (parse.contains("&&")) {
+ String[] parses = parse.split("&&"); //带&&的重新拼接
+ List new_parses = new ArrayList<>(); //构造新的解析表达式列表
+ for (int i = 0; i < parses.length; i++) {
+ String[] pss = parses[i].split(" ");
+ String ps = pss[pss.length - 1]; //如果分割&&后带空格就取最后一个元素
+ Matcher m = NOADD_INDEX.matcher(ps);
+ //if (!isIndex(ps)) {
+ if (!m.find()) {
+ if (!first && i >= parses.length - 1) { //不传first且遇到最后一个,不用补eq(0)
+ new_parses.add(parses[i]);
+ } else {
+ new_parses.add(parses[i] + ":eq(0)");
+ }
+ } else {
+ new_parses.add(parses[i]);
+ }
+ }
+ parse = TextUtils.join(" ", new_parses);
+ } else {
+ String[] pss = parse.split(" ");
+ String ps = pss[pss.length - 1]; //如果分割&&后带空格就取最后一个元素
+ Matcher m = NOADD_INDEX.matcher(ps);
+ //if (!isIndex(ps) && first) {
+ if (!m.find() && first) {
+ parse = parse + ":eq(0)";
+ }
+ }
+ return parse;
+ }
+
+ public static String parseDomForUrl(String html, String rule, String add_url) {
+ if (!pdfh_html.equals(html)) {
+ pdfh_html = html;
+ pdfh_doc = Jsoup.parse(html);
+ }
+ Document doc = pdfh_doc;
+ if (rule.equals("body&&Text") || rule.equals("Text")) {
+ return doc.text();
+ } else if (rule.equals("body&&Html") || rule.equals("Html")) {
+ return doc.html();
+ }
+ String option = "";
+ if (rule.contains("&&")) {
+ String[] rs = rule.split("&&");
+ option = rs[rs.length - 1];
+ List excludes = new ArrayList<>(Arrays.asList(rs));
+ excludes.remove(rs.length - 1);
+ rule = TextUtils.join("&&", excludes);
+ }
+ rule = parseHikerToJq(rule, true);
+ String[] parses = rule.split(" ");
+ Elements ret = new Elements();
+ for (String nparse : parses) {
+ ret = parseOneRule(doc, nparse, ret);
+ if (ret.isEmpty()) {
+ return "";
+ }
+ }
+ String result;
+ if (JSUtils.isNotEmpty(option)) {
+ if (option.equals("Text")) {
+ result = ret.text();
+ } else if (option.equals("Html")) {
+ result = ret.html();
+ } else {
+ result = ret.attr(option);
+ if (option.toLowerCase().contains("style") && result.contains("url(")) {
+ Matcher m = p.matcher(result);
+ if (m.find()) {
+ result = m.group(1);
+ }
+ }
+ if (JSUtils.isNotEmpty(result) && JSUtils.isNotEmpty(add_url)) {
+ // 需要自动urljoin的属性
+ Matcher m = URLJOIN_ATTR.matcher(option);
+ //if (isUrl(option)) {
+ if (m.find()) {
+ if (result.contains("http")) {
+ result = result.substring(result.indexOf("http"));
+ } else {
+ result = joinUrl(add_url, result);
+ }
+ }
+ }
+ }
+ } else {
+ result = ret.outerHtml();
+ }
+ return result;
+
+ }
+
+ public static List parseDomForArray(String html, String rule) {
+ if (!pdfa_html.equals(html)) {
+ pdfa_html = html;
+ pdfa_doc = Jsoup.parse(html);
+ }
+ Document doc = pdfa_doc;
+ rule = parseHikerToJq(rule, false);
+ String[] parses = rule.split(" ");
+ Elements ret = new Elements();
+ for (String pars : parses) {
+ ret = parseOneRule(doc, pars, ret);
+ if (ret.isEmpty()) {
+ return new ArrayList<>();
+ }
+ }
+
+ List eleHtml = new ArrayList<>();
+ for (int i = 0; i < ret.size(); i++) {
+ Element element1 = ret.get(i);
+ eleHtml.add(element1.outerHtml());
+ }
+ return eleHtml;
+ }
+
+ private static Elements parseOneRule(Document doc, String nparse, Elements ret) {
+ Painfo painfo = getParseInfo(nparse);
+ if (ret.isEmpty()) {
+ ret = doc.select(painfo.nparse_rule);
+ } else {
+ ret = ret.select(painfo.nparse_rule);
+ }
+
+ if (nparse.contains(":eq")) {
+ if(painfo.nparse_index < 0){
+ ret = ret.eq(ret.size() + painfo.nparse_index);
+ } else {
+ ret = ret.eq(painfo.nparse_index);
+ }
+ }
+
+ if (painfo.excludes != null && !ret.isEmpty()) {
+ ret = ret.clone(); //克隆一个, 免得直接remove会影响doc的缓存
+ for (int i = 0; i < painfo.excludes.size(); i++) {
+ ret.select(painfo.excludes.get(i)).remove();
+ }
+ }
+ return ret;
+ }
+
+ public static List parseDomForList(String html, String p1, String list_text, String list_url, String add_url) {
+ if (!pdfa_html.equals(html)) {
+ pdfa_html = html;
+ pdfa_doc = Jsoup.parse(html);
+ }
+ Document doc = pdfa_doc;
+ p1 = parseHikerToJq(p1, false);
+ String[] parses = p1.split(" ");
+ Elements ret = new Elements();
+ for (String pars : parses) {
+ ret = parseOneRule(doc, pars, ret);
+ if (ret.isEmpty()) {
+ return new ArrayList<>();
+ }
+ }
+ List new_vod_list = new ArrayList<>();
+ for(int i = 0; i < ret.size(); i++){
+ String it = ret.get(i).outerHtml();
+ new_vod_list.add(parseDomForUrl(it, list_text, "").trim() + '$' + parseDomForUrl(it, list_url, add_url));
+ }
+ return new_vod_list;
+ }
+}