fix:爬取数据

上级 72d39140
......@@ -15,7 +15,7 @@
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.12.0</version>
<version>7.3.0</version>
<exclusions>
<exclusion>
<groupId>org.elasticsearch</groupId>
......@@ -26,7 +26,7 @@
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>7.12.0</version>
<version>7.3.0</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
......
......@@ -5,6 +5,7 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import java.io.IOException;
......@@ -24,9 +25,9 @@ public class ContentController {
* @return
* @throws Exception
*/
@GetMapping("/parse/{keyword}")
@GetMapping("/parse")
@ResponseBody
public Boolean parse(@PathVariable String keyword) throws Exception {
public Boolean parse(@RequestParam(value = "keyword") String keyword) throws Exception {
return contentService.parseContent(keyword);
}
......
......@@ -29,22 +29,28 @@ import java.util.concurrent.TimeUnit;
@Service
public class ContentServiceImpl implements ContentService {
@Autowired
private RestHighLevelClient restHighLevelClient;
@Autowired
private HtmlParseUtil htmlParseUtil;
@Autowired
private RestHighLevelClient restHighLevelClient;
//1.解析数据放入es索引中
/**
* 解析数据放入es索引中
*
* @param keywords
* @return
* @throws Exception
*/
@Override
public Boolean parseContent(String keywords) throws Exception {
List<Content> contents = htmlParseUtil.parseJD(keywords);
//把查询出来的数据放入es里面
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");
bulkRequest.timeout(TimeValue.timeValueMinutes(120));
for (int i = 0; i < contents.size(); i++) {
System.out.println(contents.get(i));
bulkRequest.add(
new IndexRequest("jd_goods_2", keywords + "")
new IndexRequest("jd_goods" + keywords, keywords + "")
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
......
server:
port: 40100
port: 8888
spring:
application:
......
package es_02_index;
import org.elasticsearch.action.admin.indices.close.CloseIndexRequest;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CloseIndexRequest;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
......
......@@ -68,7 +68,8 @@ public class TestAggs_05 {
searchSourceBuilder.size(0);
searchSourceBuilder.query(QueryBuilders.matchAllQuery());
DateHistogramAggregationBuilder dateHistogramAggregationBuilder = AggregationBuilders.dateHistogram("date_histogram").field("sold_date").calendarInterval(DateHistogramInterval.QUARTER)
.format("yyyy-MM-dd").minDocCount(0).extendedBounds(new ExtendedBounds("2019-01-01", "2020-12-31"));
.format("yyyy-MM-dd").minDocCount(0)
.extendedBounds(new ExtendedBounds("2019-01-01", "2020-12-31"));
SumAggregationBuilder sumAggregationBuilder = AggregationBuilders.sum("income").field("price");
dateHistogramAggregationBuilder.subAggregation(sumAggregationBuilder);
searchSourceBuilder.aggregation(dateHistogramAggregationBuilder);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册