提交 72d39140 编写于 作者: 檀越@新空间's avatar 檀越@新空间 🐭

fix:启动

上级 cc0a6119
...@@ -64,6 +64,16 @@ ...@@ -64,6 +64,16 @@
<artifactId>commons-lang3</artifactId> <artifactId>commons-lang3</artifactId>
<version>3.1</version> <version>3.1</version>
</dependency> </dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.83</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.15.3</version>
</dependency>
</dependencies> </dependencies>
<repositories> <repositories>
<repository> <repository>
......
package com.kwan.shuyu.controller;
import com.kwan.shuyu.service.ContentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.ResponseBody;
import java.io.IOException;
import java.util.List;
import java.util.Map;
@Controller
public class ContentController {
@Autowired
private ContentService contentService;
/**
* 从京东爬取关键字信息(title、price、src)存放到es中
*
* @param keyword
* @return
* @throws Exception
*/
@GetMapping("/parse/{keyword}")
@ResponseBody
public Boolean parse(@PathVariable String keyword) throws Exception {
return contentService.parseContent(keyword);
}
/**
* 从es中查询相关的信息 参数一:关键字 参数二:页码 参数三:显示条数
*
* @param keyword
* @param pageNo
* @param pageSize
* @return
* @throws IOException
*/
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
@ResponseBody
public List<Map<String, Object>> search(@PathVariable("keyword") String keyword,
@PathVariable("pageNo") int pageNo,
@PathVariable("pageSize") int pageSize) throws IOException {
if (pageNo == 0) {
pageNo = 1;
}
if (pageSize == 0) {
pageSize = 5;
}
return contentService.searchPage(keyword, pageNo, pageSize);
}
@GetMapping("/parse/add2es")
public String test(String keyword) {
System.out.println(keyword);
return "redirect:/parse/" + keyword;
}
}
\ No newline at end of file
...@@ -3,7 +3,7 @@ package com.kwan.shuyu.controller; ...@@ -3,7 +3,7 @@ package com.kwan.shuyu.controller;
import com.kwan.shuyu.domain.CoursePub; import com.kwan.shuyu.domain.CoursePub;
import com.kwan.shuyu.domain.CourseSearchParam; import com.kwan.shuyu.domain.CourseSearchParam;
import com.kwan.shuyu.domain.QueryResponseResult; import com.kwan.shuyu.domain.QueryResponseResult;
import com.kwan.shuyu.service.EsCourseServiceImpl; import com.kwan.shuyu.service.impl.EsCourseServiceImpl;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PathVariable;
......
package com.kwan.shuyu.domain;
import lombok.Data;
@Data
public class Content {
private String title;
private String img;
private String price;
}
package com.kwan.shuyu.service;
import java.io.IOException;
import java.util.List;
import java.util.Map;
public interface ContentService {
/**
* 解析内容
*
* @param keywords
* @return
* @throws Exception
*/
Boolean parseContent(String keywords) throws Exception;
/**
* 获取这些数据实现搜索功能
*
* @param keyword
* @param pageNo
* @param pageSize
* @return
* @throws IOException
*/
List<Map<String, Object>> searchPage(String keyword, int pageNo, int pageSize) throws IOException;
}
\ No newline at end of file
package com.kwan.shuyu.service.impl; package com.kwan.shuyu.service;
import com.kwan.shuyu.domain.CoursePub; import com.kwan.shuyu.domain.CoursePub;
import com.kwan.shuyu.domain.CourseSearchParam; import com.kwan.shuyu.domain.CourseSearchParam;
......
package com.kwan.shuyu.service.impl;
import com.alibaba.fastjson.JSON;
import com.kwan.shuyu.domain.Content;
import com.kwan.shuyu.service.ContentService;
import com.kwan.shuyu.util.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@Service
public class ContentServiceImpl implements ContentService {
@Autowired
private RestHighLevelClient restHighLevelClient;
@Autowired
private HtmlParseUtil htmlParseUtil;
//1.解析数据放入es索引中
@Override
public Boolean parseContent(String keywords) throws Exception {
List<Content> contents = htmlParseUtil.parseJD(keywords);
//把查询出来的数据放入es里面
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");
for (int i = 0; i < contents.size(); i++) {
System.out.println(contents.get(i));
bulkRequest.add(
new IndexRequest("jd_goods_2", keywords + "")
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();
}
/**
* 获取这些数据实现搜索功能
*
* @param keyword
* @param pageNo
* @param pageSize
* @return
* @throws IOException
*/
@Override
public List<Map<String, Object>> searchPage(String keyword, int pageNo, int pageSize) throws IOException {
if (pageNo <= 1) {
pageNo = 1;
}
//条件搜索
SearchRequest searchRequest = new SearchRequest("jd_goods");
//资源构造器(封装查询条件)
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
//分页
sourceBuilder.from(pageNo);
sourceBuilder.size(pageSize);
//精准匹配
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
//执行搜索
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
//解析结果
List<Map<String, Object>> list = new ArrayList<>();
for (SearchHit documentFields : searchResponse.getHits().getHits()) {
list.add(documentFields.getSourceAsMap());
}
return list;
}
}
package com.kwan.shuyu.service; package com.kwan.shuyu.service.impl;
import com.kwan.shuyu.domain.*; import com.kwan.shuyu.domain.*;
import com.kwan.shuyu.service.impl.EsCourseService; import com.kwan.shuyu.service.EsCourseService;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchResponse;
......
package com.kwan.shuyu.util;
import com.kwan.shuyu.domain.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
/**
* 爬虫获取京东数据并解析
*
* @author : qinyingjie
* @version : 2.2.0
* @date : 2023/5/14 01:33
*/
@Component
public class HtmlParseUtil {
public List<Content> parseJD(String keywords) throws Exception {
//1.获取请求
String url = "https://search.jd.com/Search?keyword=" + keywords;
//2.解析网页(Jsoup返回Document就是浏览器的Document对象)
Document document = Jsoup.parse(new URL(url), 30000);
Element ele = document.getElementById("J_goodsList");
List<Content> list = new ArrayList<>();
//获取所有的li标签
Elements tag_lis = ele.getElementsByTag("li");
//获取元素中的内容,这里每个element就是li标签
for (Element element : tag_lis) {
String img = element.getElementsByTag("img").eq(0).attr("src");
String price = element.getElementsByClass("p-price").eq(0).text();
String title = element.getElementsByClass("p-name").eq(0).text();
//封装对象
Content content = new Content();
content.setTitle(title);
content.setImg(img);
content.setPrice(price);
list.add(content);
}
return list;
}
}
\ No newline at end of file
...@@ -6,6 +6,6 @@ spring: ...@@ -6,6 +6,6 @@ spring:
name: search-service name: search-service
kwan: kwan:
elasticsearch: elasticsearch:
hostlist: 47.119.160.231:9200 #多个节点用逗号分隔 hostlist: 47.119.162.180:9200 #多个节点用逗号分隔
course: #课程字段 course: #课程字段
source_field: id,name,grade,mt,st,charge,valid,pic,qq,price,price_old,status,studymodel,teachmode,expires,pub_time,start_time,end_time source_field: id,name,grade,mt,st,charge,valid,pic,qq,price,price_old,status,studymodel,teachmode,expires,pub_time,start_time,end_time
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册