提交 56c27bbc 编写于 作者: 如梦技术's avatar 如梦技术 🐛

mica-http 去掉 asDocument 方法,不强制依赖 jsoup,CssQuery 添加取值正则.

上级 141d123f
......@@ -1641,10 +1641,95 @@ public class $ {
* @return Bean
*/
@Nullable
public static <T> T readJson(@Nullable InputStream in, TypeReference<?> typeReference) {
public static <T> T readJsonAsJson(@Nullable InputStream in, TypeReference<?> typeReference) {
return JsonUtil.readValue(in, typeReference);
}
/**
* 读取集合
*
* @param content bytes
* @param elementClass elementClass
* @param <T> 泛型
* @return 集合
*/
@Nullable
public static <T> List<T> readJsonAsList(@Nullable byte[] content, Class<T> elementClass) {
return JsonUtil.readList(content, elementClass);
}
/**
* 读取集合
*
* @param content InputStream
* @param elementClass elementClass
* @param <T> 泛型
* @return 集合
*/
@Nullable
public static <T> List<T> readJsonAsList(@Nullable InputStream content, Class<T> elementClass) {
return JsonUtil.readList(content, elementClass);
}
/**
* 读取集合
*
* @param content bytes
* @param elementClass elementClass
* @param <T> 泛型
* @return 集合
*/
@Nullable
public static <T> List<T> readJsonAsList(@Nullable String content, Class<T> elementClass) {
return JsonUtil.readList(content, elementClass);
}
/**
* 读取集合
*
* @param content bytes
* @param keyClass key类型
* @param valueClass 值类型
* @param <K> 泛型
* @param <V> 泛型
* @return 集合
*/
@Nullable
public static <K, V> Map<K, V> readJsonAsMap(@Nullable byte[] content, Class<?> keyClass, Class<?> valueClass) {
return JsonUtil.readMap(content, keyClass, valueClass);
}
/**
* 读取集合
*
* @param content InputStream
* @param keyClass key类型
* @param valueClass 值类型
* @param <K> 泛型
* @param <V> 泛型
* @return 集合
*/
@Nullable
public static <K, V> Map<K, V> readJsonAsMap(@Nullable InputStream content, Class<?> keyClass, Class<?> valueClass) {
return JsonUtil.readMap(content, keyClass, valueClass);
}
/**
* 读取集合
*
* @param content bytes
* @param keyClass key类型
* @param valueClass 值类型
* @param <K> 泛型
* @param <V> 泛型
* @return 集合
*/
@Nullable
public static <K, V> Map<K, V> readJsonAsMap(@Nullable String content, Class<?> keyClass, Class<?> valueClass) {
return JsonUtil.readMap(content, keyClass, valueClass);
}
/**
* url 编码
*
......
......@@ -329,7 +329,7 @@ public class JsonUtil {
* @return 集合
*/
@Nullable
private static <T> List<T> readList(@Nullable String content, Class<T> elementClass) {
public static <T> List<T> readList(@Nullable String content, Class<T> elementClass) {
if (ObjectUtil.isEmpty(content)) {
return Collections.emptyList();
}
......
......@@ -11,11 +11,26 @@
</dependency>
```
jsoup 可选依赖,用来将 html 转换成 java Bean。
```xml
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>${jsoup.version}</version>
</dependency>
```
### gradle
```groovy
compile("net.dreamlu:mica-http:${version}")
```
jsoup 可选依赖,用来将 html 转换成 java Bean。
```groovy
compile("org.jsoup:jsoup:${jsoupVersion}")
```
### 使用文档
```java
// 设定全局日志级别 NONE,BASIC,HEADERS,BODY, 默认:NONE
......@@ -38,7 +53,7 @@ HttpRequest.get("https://www.baidu.com")
.asJsonNode(); // 结果集转换,注:如果网络异常等会直接抛出异常。
// 同类的方法有 asString、asBytes、asStream
// json 类响应:asJsonNode、asValue、asList、asMap,采用 jackson 处理
// xml、html响应:asDocument,asDomValue、asDomList采用的 jsoup 处理
// xml、html响应:asDomValue、asDomList 采用的 jsoup 处理,需要添加 Jsoup 依赖。
// file 文件:toFile
// 同步
......@@ -72,6 +87,15 @@ HttpRequest.delete("https://www.baidu.com")
.execute(); // 异步最后发起请求
```
### DomMapper 工具
`DomMapper` 工具采用 `cglib` 动态代理和 `Jsoup` html 解析,不到 `200` 行代码实现了 `html``java Bean` 工具,爬虫必备。
主要方法有:
- DomMapper.readDocument
- DomMapper.readValue
- DomMapper.readList
### 示例代码1
```java
// 设置全局日志级别
......
......@@ -2,6 +2,6 @@ dependencies {
api project(":mica-core")
api "com.squareup.okhttp3:okhttp:${okhttpVersion}"
api "com.squareup.okhttp3:logging-interceptor:${okhttpVersion}"
api "org.jsoup:jsoup:${jsoupVersion}"
implementation "org.jsoup:jsoup:${jsoupVersion}"
implementation "org.springframework.retry:spring-retry"
}
......@@ -50,6 +50,13 @@ public @interface CssQuery {
*/
String attr() default "";
/**
* 正则,用于对 attr value 处理
*
* @return regex
*/
String regex() default "";
/**
* 嵌套的内部模型:默认 false
*
......
......@@ -35,6 +35,8 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
......@@ -70,7 +72,8 @@ public class CssQueryMethodInterceptor implements MethodInterceptor {
return proxyInner(cssQueryValue, method, returnType, isColl);
}
String attrName = annotation.attr();
Object proxyValue = proxyValue(cssQueryValue, attrName, returnType, isColl);
String valueRegex = annotation.regex();
Object proxyValue = proxyValue(cssQueryValue, attrName, valueRegex, returnType, isColl);
if (String.class.isAssignableFrom(returnType)) {
return proxyValue;
}
......@@ -80,7 +83,7 @@ public class CssQueryMethodInterceptor implements MethodInterceptor {
}
@Nullable
private Object proxyValue(String cssQueryValue, String attrName, Class<?> returnType, boolean isColl) {
private Object proxyValue(String cssQueryValue, String attrName, String valueRegex, Class<?> returnType, boolean isColl) {
if (isColl) {
Elements elements = Selector.select(cssQueryValue, element);
Collection<Object> valueList = newColl(returnType);
......@@ -88,7 +91,7 @@ public class CssQueryMethodInterceptor implements MethodInterceptor {
return valueList;
}
for (Element select : elements) {
String value = getValue(select, attrName);
String value = getValue(select, attrName, valueRegex);
if (value != null) {
valueList.add(value);
}
......@@ -96,7 +99,7 @@ public class CssQueryMethodInterceptor implements MethodInterceptor {
return valueList;
}
Element select = Selector.selectFirst(cssQueryValue, element);
return getValue(select, attrName);
return getValue(select, attrName, valueRegex);
}
private Object proxyInner(String cssQueryValue, Method method, Class<?> returnType, boolean isColl) {
......@@ -118,21 +121,31 @@ public class CssQueryMethodInterceptor implements MethodInterceptor {
}
@Nullable
private String getValue(@Nullable Element element, String attrName) {
private String getValue(@Nullable Element element, String attrName, String valueRegex) {
if (element == null) {
return null;
}
String attrValue;
if (StringUtil.isBlank(attrName)) {
return element.outerHtml();
attrValue = element.outerHtml();
} else if ("html".equalsIgnoreCase(attrName)) {
return element.html();
attrValue = element.html();
} else if ("text".equalsIgnoreCase(attrName)) {
return getText(element);
attrValue = getText(element);
} else if ("allText".equalsIgnoreCase(attrName)) {
return element.text();
attrValue = element.text();
} else {
return element.attr(attrName);
attrValue = element.attr(attrName);
}
if (StringUtil.isBlank(attrValue) || StringUtil.isBlank(valueRegex)) {
return attrValue;
}
// 处理正则表达式
Matcher matcher = Pattern.compile(valueRegex).matcher(attrValue);
if (matcher.find()) {
return matcher.group();
}
return null;
}
private String getText(Element element) {
......
......@@ -21,7 +21,6 @@ import com.fasterxml.jackson.databind.JsonNode;
import net.dreamlu.mica.core.utils.Exceptions;
import net.dreamlu.mica.core.utils.JsonUtil;
import okhttp3.*;
import org.jsoup.nodes.Document;
import javax.annotation.Nullable;
import java.io.File;
......@@ -184,7 +183,7 @@ public class HttpResponse implements ResponseSpec {
try {
return body.string();
} catch (IOException e) {
throw new RuntimeException(e);
throw Exceptions.unchecked(e);
}
}
......@@ -194,7 +193,7 @@ public class HttpResponse implements ResponseSpec {
try {
return body.bytes();
} catch (IOException e) {
throw new RuntimeException(e);
throw Exceptions.unchecked(e);
}
}
......@@ -206,50 +205,44 @@ public class HttpResponse implements ResponseSpec {
@Override
public JsonNode asJsonNode() {
return JsonUtil.readTree(this.asStream());
return JsonUtil.readTree(this.asString());
}
@Override
public <T> T asValue(Class<T> valueType) {
return JsonUtil.readValue(this.asStream(), valueType);
return JsonUtil.readValue(this.asString(), valueType);
}
@Override
public <T> T asValue(TypeReference<?> typeReference) {
return JsonUtil.readValue(this.asStream(), typeReference);
return JsonUtil.readValue(this.asString(), typeReference);
}
@Override
public <T> List<T> asList(Class<T> valueType) {
return JsonUtil.readList(this.asStream(), valueType);
return JsonUtil.readList(this.asString(), valueType);
}
@Override
public <K, V> Map<K, V> asMap(Class<?> keyClass, Class<?> valueType) {
return JsonUtil.readMap(this.asStream(), keyClass, valueType);
return JsonUtil.readMap(this.asString(), keyClass, valueType);
}
@Override
public <V> Map<String, V> asMap(Class<?> valueType) {
return JsonUtil.readMap(this.asStream(), String.class, valueType);
}
@Override
public Document asDocument() {
return DomMapper.readDocument(this.asStream());
return JsonUtil.readMap(this.asString(), String.class, valueType);
}
@Override
public <T> T asDomValue(Class<T> valueType) {
return DomMapper.readValue(this.asDocument(), valueType);
return DomMapper.readValue(this.asString(), valueType);
}
@Override
public <T> List<T> asDomList(Class<T> valueType) {
return DomMapper.readList(this.asDocument(), valueType);
return DomMapper.readList(this.asString(), valueType);
}
@Override
public void toFile(File file) {
toFile(file.toPath());
......@@ -260,7 +253,7 @@ public class HttpResponse implements ResponseSpec {
try {
Files.copy(this.asStream(), path);
} catch (IOException e) {
throw new RuntimeException(e);
throw Exceptions.unchecked(e);
}
}
......
......@@ -19,7 +19,6 @@ package net.dreamlu.mica.http;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonNode;
import okhttp3.*;
import org.jsoup.nodes.Document;
import java.io.File;
import java.io.InputStream;
......@@ -145,13 +144,6 @@ public interface ResponseSpec {
*/
<V> Map<String, V> asMap(Class<?> valueType);
/**
* Returns body to jsoup Document.
*
* @return Document
*/
Document asDocument();
/**
* 将 xml、heml 转成对象
*
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册