提交 e9d489ce 编写于 作者: M monomania

1.优化爬虫线程带出的bug

上级 97d249cd
......@@ -44,13 +44,14 @@ func (this *AsiaLastNewProcesser) Setup(temp *AsiaLastNewProcesser) {
func (this *AsiaLastNewProcesser) Startup() {
var processer *AsiaLastNewProcesser
var newSpider *spider.Spider
for i, v := range this.MatchLastList {
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30spider
processer = GetAsiaLastNewProcesser()
processer.Setup(this)
newSpider = spider.NewSpider(processer, "AsiaLastNewProcesser"+strconv.Itoa(i))
}
newSpider := spider.NewSpider(processer, "AsiaLastNewProcesser"+strconv.Itoa(i))
temp_flag := v.Ext[win007.MODULE_FLAG]
bytes, _ := json.Marshal(temp_flag)
......@@ -62,10 +63,12 @@ func (this *AsiaLastNewProcesser) Startup() {
url := strings.Replace(win007.WIN007_ASIAODD_NEW_URL_PATTERN, "${matchId}", win007_id, 1)
newSpider = newSpider.AddUrl(url, "json")
newSpider.SetDownloader(down.NewMAsiaLastApiDownloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30spider
newSpider.SetDownloader(down.NewMAsiaLastApiDownloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
}
}
}
......
......@@ -49,13 +49,14 @@ func (this *BaseFaceProcesser) Setup(temp *BaseFaceProcesser) {
func (this *BaseFaceProcesser) Startup() {
var processer *BaseFaceProcesser
var newSpider *spider.Spider
for i, v := range this.MatchLastList {
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30spider
processer = GetBaseFaceProcesser()
processer.Setup(this)
newSpider = spider.NewSpider(processer, "BaseFaceProcesser"+strconv.Itoa(i))
}
newSpider := spider.NewSpider(processer, "BaseFaceProcesser"+strconv.Itoa(i))
temp_flag := v.Ext[win007.MODULE_FLAG]
bytes, _ := json.Marshal(temp_flag)
......@@ -67,11 +68,13 @@ func (this *BaseFaceProcesser) Startup() {
url := strings.Replace(win007.WIN007_BASE_FACE_URL_PATTERN, "${matchId}", win007_id, 1)
newSpider = newSpider.AddUrl(url, "html")
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30spider
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
}
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
}
}
......
......@@ -50,13 +50,14 @@ func (this *EuroLastProcesser) Setup(temp *EuroLastProcesser) {
func (this *EuroLastProcesser) Startup() {
var processer *EuroLastProcesser
var newSpider *spider.Spider
for i, v := range this.MatchLastList {
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30spider
processer = GetEuroLastProcesser()
processer.Setup(this)
newSpider = spider.NewSpider(processer, "EuroLastProcesser"+strconv.Itoa(i))
}
newSpider := spider.NewSpider(processer, "EuroLastProcesser"+strconv.Itoa(i))
temp_flag := v.Ext[win007.MODULE_FLAG]
bytes, _ := json.Marshal(temp_flag)
......@@ -68,10 +69,12 @@ func (this *EuroLastProcesser) Startup() {
url := strings.Replace(win007.WIN007_EUROODD_URL_PATTERN, "${matchId}", win007_id, 1)
newSpider = newSpider.AddUrl(url, "html")
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 100, 2000)
newSpider.SetThreadnum(1).Run()
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30spider
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 100, 2000)
newSpider.SetThreadnum(1).Run()
}
}
}
......
......@@ -49,13 +49,14 @@ func (this *EuroTrackProcesser) Setup(temp *EuroTrackProcesser) {
func (this *EuroTrackProcesser) Startup() {
var processer *EuroTrackProcesser
var newSpider *spider.Spider
for i, v := range this.MatchLastList {
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30spider
processer = GetEuroTrackProcesser()
processer.Setup(this)
newSpider = spider.NewSpider(processer, "EuroTrackProcesser"+strconv.Itoa(i))
}
newSpider := spider.NewSpider(processer, "EuroTrackProcesser"+strconv.Itoa(i))
temp_flag := v.Ext[win007.MODULE_FLAG]
bytes, _ := json.Marshal(temp_flag)
......@@ -69,11 +70,12 @@ func (this *EuroTrackProcesser) Startup() {
url := strings.Replace(base_url, "${cId}", v, 1)
newSpider = newSpider.AddUrl(url, "html")
}
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30spider
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
}
}
}
......
......@@ -46,6 +46,7 @@ func (this *LeagueProcesser) Startup() {
document, _ := GetDocument(sid_stat_url)
var processer *LeagueProcesser
var newSpider *spider.Spider
document.Find("a[href*='sid']").Each(func(i int, selection *goquery.Selection) {
sUrl, _ := selection.Attr("href")
sId := strings.Split(sUrl, "sid=")[1]
......@@ -58,16 +59,18 @@ func (this *LeagueProcesser) Startup() {
if i%10 == 0 { //10个联赛一个spider,总数1000多个联赛,最多100spider
processer = GetLeagueProcesser()
processer.Setup(this)
newSpider = spider.NewSpider(processer, "LeagueProcesser"+strconv.Itoa(i))
}
newSpider := spider.NewSpider(processer, "LeagueProcesser"+strconv.Itoa(i))
processer.sUrl_Id[win007.WIN007_BASE_URL+sUrl] = sId
processer.sUrl_Name[win007.WIN007_BASE_URL+sUrl] = sName
newSpider = newSpider.AddUrl(win007.WIN007_BASE_URL+sUrl, "html")
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
if i%10 == 0 { //10个联赛一个spider,总数1000多个联赛,最多100spider
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
}
})
}
......
......@@ -49,6 +49,7 @@ func (this *LeagueSeasonProcesser) Startup() {
this.LeagueService.FindAll(&leaguesList)
//2.配置要抓取的路径
var processer *LeagueSeasonProcesser
var newSpider *spider.Spider
//index := 0
for i, v := range leaguesList {
//先不处理杯赛....
......@@ -62,8 +63,8 @@ func (this *LeagueSeasonProcesser) Startup() {
if i % 10 == 0 {//10个联赛一个spider,总数1000多个联赛,最多100spider
processer = GetLeagueSeasonProcesser()
processer.Setup(this)
newSpider = spider.NewSpider(processer, "LeagueSeasonProcesser"+strconv.Itoa(i))
}
newSpider := spider.NewSpider(processer, "LeagueSeasonProcesser"+strconv.Itoa(i))
url := win007.WIN007_MATCH_HIS_PATTERN
if v.SeasonCross {
......@@ -77,11 +78,12 @@ func (this *LeagueSeasonProcesser) Startup() {
processer.sUrl_leagueId[url] = v.Id
newSpider = newSpider.AddUrl(url, "html")
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
if i % 10 == 0 {//10个联赛一个spider,总数1000多个联赛,最多100spider
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
}
}
}
......
......@@ -61,13 +61,14 @@ func (this *MatchHisProcesser) Startup() {
seasonList := this.LeagueSeasonService.FindBySeason(this.Season)
//2.配置要抓取的路径
var processer *MatchHisProcesser
var newSpider *spider.Spider
for i, v := range seasonList {
if i%10 == 0 { //10个联赛一个spider,总数1000多个联赛,最多100spider
processer = GetMatchHisProcesser()
processer.Setup(this)
newSpider = spider.NewSpider(processer, "MatchHisProcesser"+strconv.Itoa(i))
}
newSpider := spider.NewSpider(processer, "MatchHisProcesser"+strconv.Itoa(i))
url := win007.WIN007_MATCH_HIS_PATTERN
url = strings.Replace(url, "${season}", v.Season, 1)
......@@ -81,11 +82,12 @@ func (this *MatchHisProcesser) Startup() {
processer.SUrl_Season[round_url] = v
newSpider = newSpider.AddUrl(round_url, "html")
}
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
if i%10 == 0 { //10个联赛一个spider,总数1000多个联赛,最多100spider
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册