fix:爬取数据

上级 72d39140
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
<dependency> <dependency>
<groupId>org.elasticsearch.client</groupId> <groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId> <artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.12.0</version> <version>7.3.0</version>
<exclusions> <exclusions>
<exclusion> <exclusion>
<groupId>org.elasticsearch</groupId> <groupId>org.elasticsearch</groupId>
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>org.elasticsearch</groupId> <groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId> <artifactId>elasticsearch</artifactId>
<version>7.12.0</version> <version>7.3.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
......
...@@ -5,6 +5,7 @@ import org.springframework.beans.factory.annotation.Autowired; ...@@ -5,6 +5,7 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller; import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody; import org.springframework.web.bind.annotation.ResponseBody;
import java.io.IOException; import java.io.IOException;
...@@ -24,9 +25,9 @@ public class ContentController { ...@@ -24,9 +25,9 @@ public class ContentController {
* @return * @return
* @throws Exception * @throws Exception
*/ */
@GetMapping("/parse/{keyword}") @GetMapping("/parse")
@ResponseBody @ResponseBody
public Boolean parse(@PathVariable String keyword) throws Exception { public Boolean parse(@RequestParam(value = "keyword") String keyword) throws Exception {
return contentService.parseContent(keyword); return contentService.parseContent(keyword);
} }
......
...@@ -29,22 +29,28 @@ import java.util.concurrent.TimeUnit; ...@@ -29,22 +29,28 @@ import java.util.concurrent.TimeUnit;
@Service @Service
public class ContentServiceImpl implements ContentService { public class ContentServiceImpl implements ContentService {
@Autowired
private RestHighLevelClient restHighLevelClient;
@Autowired @Autowired
private HtmlParseUtil htmlParseUtil; private HtmlParseUtil htmlParseUtil;
@Autowired
private RestHighLevelClient restHighLevelClient;
//1.解析数据放入es索引中 /**
* 解析数据放入es索引中
*
* @param keywords
* @return
* @throws Exception
*/
@Override @Override
public Boolean parseContent(String keywords) throws Exception { public Boolean parseContent(String keywords) throws Exception {
List<Content> contents = htmlParseUtil.parseJD(keywords); List<Content> contents = htmlParseUtil.parseJD(keywords);
//把查询出来的数据放入es里面 //把查询出来的数据放入es里面
BulkRequest bulkRequest = new BulkRequest(); BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m"); bulkRequest.timeout(TimeValue.timeValueMinutes(120));
for (int i = 0; i < contents.size(); i++) { for (int i = 0; i < contents.size(); i++) {
System.out.println(contents.get(i)); System.out.println(contents.get(i));
bulkRequest.add( bulkRequest.add(
new IndexRequest("jd_goods_2", keywords + "") new IndexRequest("jd_goods" + keywords, keywords + "")
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON)); .source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
} }
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT); BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
......
server: server:
port: 40100 port: 8888
spring: spring:
application: application:
......
package es_02_index; package es_02_index;
import org.elasticsearch.action.admin.indices.close.CloseIndexRequest;
import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CloseIndexRequest;
import org.junit.Test; import org.junit.Test;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
......
...@@ -68,7 +68,8 @@ public class TestAggs_05 { ...@@ -68,7 +68,8 @@ public class TestAggs_05 {
searchSourceBuilder.size(0); searchSourceBuilder.size(0);
searchSourceBuilder.query(QueryBuilders.matchAllQuery()); searchSourceBuilder.query(QueryBuilders.matchAllQuery());
DateHistogramAggregationBuilder dateHistogramAggregationBuilder = AggregationBuilders.dateHistogram("date_histogram").field("sold_date").calendarInterval(DateHistogramInterval.QUARTER) DateHistogramAggregationBuilder dateHistogramAggregationBuilder = AggregationBuilders.dateHistogram("date_histogram").field("sold_date").calendarInterval(DateHistogramInterval.QUARTER)
.format("yyyy-MM-dd").minDocCount(0).extendedBounds(new ExtendedBounds("2019-01-01", "2020-12-31")); .format("yyyy-MM-dd").minDocCount(0)
.extendedBounds(new ExtendedBounds("2019-01-01", "2020-12-31"));
SumAggregationBuilder sumAggregationBuilder = AggregationBuilders.sum("income").field("price"); SumAggregationBuilder sumAggregationBuilder = AggregationBuilders.sum("income").field("price");
dateHistogramAggregationBuilder.subAggregation(sumAggregationBuilder); dateHistogramAggregationBuilder.subAggregation(sumAggregationBuilder);
searchSourceBuilder.aggregation(dateHistogramAggregationBuilder); searchSourceBuilder.aggregation(dateHistogramAggregationBuilder);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册