提交 569060f7 编写于 作者: M monomania

1.优化爬虫线程带出的bug

上级 48da80a9
......@@ -43,9 +43,9 @@ func (this *AsiaLastNewProcesser) Setup(temp *AsiaLastNewProcesser) {
func (this *AsiaLastNewProcesser) Startup() {
var processer *AsiaLastNewProcesser
for i, v := range this.MatchLastList {
var processer *AsiaLastNewProcesser
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30spider
processer = GetAsiaLastNewProcesser()
processer.Setup(this)
......
......@@ -48,9 +48,9 @@ func (this *BaseFaceProcesser) Setup(temp *BaseFaceProcesser) {
func (this *BaseFaceProcesser) Startup() {
var processer *BaseFaceProcesser
for i, v := range this.MatchLastList {
var processer *BaseFaceProcesser
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30spider
processer = GetBaseFaceProcesser()
processer.Setup(this)
......
......@@ -49,9 +49,9 @@ func (this *EuroLastProcesser) Setup(temp *EuroLastProcesser) {
func (this *EuroLastProcesser) Startup() {
var processer *EuroLastProcesser
for i, v := range this.MatchLastList {
var processer *EuroLastProcesser
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30spider
processer = GetEuroLastProcesser()
processer.Setup(this)
......
......@@ -48,9 +48,9 @@ func (this *EuroTrackProcesser) Setup(temp *EuroTrackProcesser) {
func (this *EuroTrackProcesser) Startup() {
var processer *EuroTrackProcesser
for i, v := range this.MatchLastList {
var processer *EuroTrackProcesser
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30spider
processer = GetEuroTrackProcesser()
processer.Setup(this)
......
......@@ -5,6 +5,7 @@ import (
"github.com/hu17889/go_spider/core/common/page"
"github.com/hu17889/go_spider/core/pipeline"
"github.com/hu17889/go_spider/core/spider"
"strconv"
"strings"
"tesou.io/platform/foot-parent/foot-api/common/base"
entity2 "tesou.io/platform/foot-parent/foot-api/module/elem/pojo"
......@@ -17,9 +18,9 @@ type LeagueProcesser struct {
service2.LeagueService
service2.CompService
//联赛数据
league_list []*entity2.League
sUrl_Id map[string]string
sUrl_Name map[string]string
league_list []*entity2.League
sUrl_Id map[string]string
sUrl_Name map[string]string
}
func GetLeagueProcesser() *LeagueProcesser {
......@@ -35,29 +36,40 @@ func (this *LeagueProcesser) Init() {
this.sUrl_Name = make(map[string]string)
}
func (this *LeagueProcesser) Setup(temp *LeagueProcesser) {
//设置参数值
}
func (this *LeagueProcesser) Startup() {
//初始化参数值
this.league_list = make([]*entity2.League, 0)
this.sUrl_Id = make(map[string]string)
this.sUrl_Name = make(map[string]string)
newSpider := spider.NewSpider(this, "LeagueProcesser")
//sid 数据
sid_stat_url := "http://m.win007.com/info.htm#section0";
document, _ := GetDocument(sid_stat_url)
var processer *LeagueProcesser
document.Find("a[href*='sid']").Each(func(i int, selection *goquery.Selection) {
sUrl, _ := selection.Attr("href")
sId := strings.Split(sUrl, "sid=")[1]
sName := strings.TrimSpace(selection.Text())
base.Log.Info("sId:", sId, ",sName:", sName, ",sUrl:"+sUrl)
this.sUrl_Id[win007.WIN007_BASE_URL+sUrl] = sId
this.sUrl_Name[win007.WIN007_BASE_URL+sUrl] = sName
if len(sUrl) <= 0 {
return
}
if i%100 == 0 { //100个联赛一个spider,总数1000多个联赛
processer = GetLeagueProcesser()
processer.Setup(this)
}
newSpider := spider.NewSpider(processer, "LeagueProcesser"+strconv.Itoa(i))
processer.sUrl_Id[win007.WIN007_BASE_URL+sUrl] = sId
processer.sUrl_Name[win007.WIN007_BASE_URL+sUrl] = sName
newSpider = newSpider.AddUrl(win007.WIN007_BASE_URL+sUrl, "html")
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
})
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand",1000,20000)
newSpider.SetThreadnum(1).Run()
}
func (this *LeagueProcesser) Process(p *page.Page) {
......@@ -81,7 +93,7 @@ func (this *LeagueProcesser) Process(p *page.Page) {
lUrl, _ := selection.Attr("href")
l_arr := strings.Split(lUrl, "/")
lId_suffix := l_arr[len(l_arr)-1]
lId := strings.ReplaceAll(lId_suffix,".htm","")
lId := strings.ReplaceAll(lId_suffix, ".htm", "")
lName := strings.TrimSpace(selection.Text())
base.Log.Info("lId:", lId, ",lName:", lName, ",lUrl:"+lUrl)
league := new(entity2.League)
......@@ -90,14 +102,14 @@ func (this *LeagueProcesser) Process(p *page.Page) {
league.Sid = sId
league.SName = sName
league.ShortUrl = lUrl
if strings.Contains(lUrl,"Cup"){
if strings.Contains(lUrl, "Cup") {
league.Cup = true
}
if strings.Contains(lUrl,"-"){
if strings.Contains(lUrl, "-") {
league.SeasonCross = true
}
this.league_list = append(this.league_list,league)
this.league_list = append(this.league_list, league)
})
}
......@@ -112,7 +124,7 @@ func (this *LeagueProcesser) Finish() {
}
exists := this.LeagueService.ExistById(v.Id)
if exists {
league_modify_list_slice = append(league_modify_list_slice,v)
league_modify_list_slice = append(league_modify_list_slice, v)
continue
}
league_list_slice = append(league_list_slice, v)
......
......@@ -48,6 +48,7 @@ func (this *LeagueSeasonProcesser) Startup() {
leaguesList := make([]*pojo.League, 0)
this.LeagueService.FindAll(&leaguesList)
//2.配置要抓取的路径
var processer *LeagueSeasonProcesser
//index := 0
for i, v := range leaguesList {
//先不处理杯赛....
......@@ -58,7 +59,6 @@ func (this *LeagueSeasonProcesser) Startup() {
//if index > 10{
// break
//}
var processer *LeagueSeasonProcesser
if i % 100 == 0 {//100个联赛一个spider,总数1000多个联赛
processer = GetLeagueSeasonProcesser()
processer.Setup(this)
......
......@@ -60,9 +60,9 @@ func (this *MatchHisProcesser) Startup() {
//1.获取所有的联赛赛季信息
seasonList := this.LeagueSeasonService.FindBySeason(this.Season)
//2.配置要抓取的路径
var processer *MatchHisProcesser
for i, v := range seasonList {
var processer *MatchHisProcesser
if i%10 == 0 { //10个联赛一个spider,总数1000多个联赛,最多100spider
processer = GetMatchHisProcesser()
processer.Setup(this)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册