提交 c5b91b5d 编写于 作者: M monomania

1.优化爬虫线程

上级 ce7dec46
......@@ -27,31 +27,47 @@ type AsiaLastNewProcesser struct {
}
func GetAsiaLastNewProcesser() *AsiaLastNewProcesser {
return &AsiaLastNewProcesser{}
processer := &AsiaLastNewProcesser{}
processer.Init()
return processer
}
func (this *AsiaLastNewProcesser) Startup() {
func (this *AsiaLastNewProcesser) Init() {
//初始化参数值
this.Win007idMatchidMap = map[string]string{}
}
func (this *AsiaLastNewProcesser) Setup(temp *AsiaLastNewProcesser) {
//设置参数值
}
func (this *AsiaLastNewProcesser) Startup() {
for i, v := range this.MatchLastList {
newSpider := spider.NewSpider(this, "AsiaLastNewProcesser")
var processer *AsiaLastNewProcesser
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30条线程
processer = GetAsiaLastNewProcesser()
processer.Setup(this)
}
newSpider := spider.NewSpider(processer, "AsiaLastNewProcesser"+strconv.Itoa(i))
for _, v := range this.MatchLastList {
i := v.Ext[win007.MODULE_FLAG]
bytes, _ := json.Marshal(i)
temp_flag := v.Ext[win007.MODULE_FLAG]
bytes, _ := json.Marshal(temp_flag)
matchExt := new(pojo.MatchExt)
json.Unmarshal(bytes, matchExt)
win007_id := matchExt.Sid
this.Win007idMatchidMap[win007_id] = v.Id
processer.Win007idMatchidMap[win007_id] = v.Id
url := strings.Replace(win007.WIN007_ASIAODD_NEW_URL_PATTERN, "${matchId}", win007_id, 1)
newSpider = newSpider.AddUrl(url, "json")
newSpider.SetDownloader(down.NewMAsiaLastApiDownloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
}
newSpider.SetDownloader(down.NewMAsiaLastApiDownloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand",100,2000)
newSpider.SetThreadnum(1).Run()
}
func (this *AsiaLastNewProcesser) Process(p *page.Page) {
......@@ -101,7 +117,7 @@ func (this *AsiaLastNewProcesser) Process(p *page.Page) {
}
last.OddDate = time.Unix(tempMt, 0).Format("2006-01-02 15:04:05")
}
last_temp_id,last_exists := this.AsiaLastService.Exist(last)
last_temp_id, last_exists := this.AsiaLastService.Exist(last)
if !last_exists {
last_slice = append(last_slice, last)
} else {
......@@ -111,7 +127,7 @@ func (this *AsiaLastNewProcesser) Process(p *page.Page) {
his := new(entity2.AsiaHis)
his.AsiaLast = *last
his_temp_id,his_exists := this.AsiaHisService.Exist(his)
his_temp_id, his_exists := this.AsiaHisService.Exist(his)
if !his_exists {
his_slice = append(his_slice, his)
} else {
......@@ -130,7 +146,7 @@ func (this *AsiaLastNewProcesser) Process(p *page.Page) {
track.Ep3 = last.Ep3
track.ELetBall = last.ELetBall
track_temp_id,track_exists := this.AsiaTrackService.Exist(track)
track_temp_id, track_exists := this.AsiaTrackService.Exist(track)
if !track_exists {
track_slice = append(track_slice, track)
} else {
......
......@@ -32,31 +32,48 @@ type BaseFaceProcesser struct {
}
func GetBaseFaceProcesser() *BaseFaceProcesser {
return &BaseFaceProcesser{}
processer := &BaseFaceProcesser{}
processer.Init()
return processer
}
func (this *BaseFaceProcesser) Startup() {
func (this *BaseFaceProcesser) Init() {
//初始化参数值
this.Win007idMatchidMap = map[string]string{}
}
func (this *BaseFaceProcesser) Setup(temp *BaseFaceProcesser) {
//设置参数值
}
func (this *BaseFaceProcesser) Startup() {
newSpider := spider.NewSpider(this, "BaseFaceProcesser")
for i, v := range this.MatchLastList {
for _, v := range this.MatchLastList {
i := v.Ext[win007.MODULE_FLAG]
bytes, _ := json.Marshal(i)
var processer *BaseFaceProcesser
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30条线程
processer = GetBaseFaceProcesser()
processer.Setup(this)
}
newSpider := spider.NewSpider(processer, "BaseFaceProcesser"+strconv.Itoa(i))
temp_flag := v.Ext[win007.MODULE_FLAG]
bytes, _ := json.Marshal(temp_flag)
matchLastExt := new(pojo.MatchExt)
json.Unmarshal(bytes, matchLastExt)
win007_id := matchLastExt.Sid
this.Win007idMatchidMap[win007_id] = v.Id
processer.Win007idMatchidMap[win007_id] = v.Id
url := strings.Replace(win007.WIN007_BASE_FACE_URL_PATTERN, "${matchId}", win007_id, 1)
newSpider = newSpider.AddUrl(url, "html")
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 1000, 20000)
newSpider.SetThreadnum(1).Run()
}
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand",100,2000)
newSpider.SetThreadnum(1).Run()
}
func (this *BaseFaceProcesser) Process(p *page.Page) {
......@@ -203,13 +220,13 @@ func (this *BaseFaceProcesser) battle_process(matchId string, p *page.Page) []*p
return data_list_slice
}
base.Log.Info("hdata_str",hdata_str,"URL:", request.Url)
base.Log.Info("hdata_str", hdata_str, "URL:", request.Url)
// 获取script脚本中的,博彩公司信息
temp_arr := strings.Split(hdata_str, "var vsTeamInfo = ")
temp_arr = strings.Split(temp_arr[1], ";")
hdata_str = strings.TrimSpace(temp_arr[0])
if hdata_str == "" {
base.Log.Info("hdata_str:解析失败,",hdata_str,"URL:", request.Url)
base.Log.Info("hdata_str:解析失败,", hdata_str, "URL:", request.Url)
return data_list_slice
}
var hdata_list = make([]*vo.BattleData, 0)
......
......@@ -6,17 +6,17 @@ import (
"github.com/hu17889/go_spider/core/common/page"
"github.com/hu17889/go_spider/core/pipeline"
"github.com/hu17889/go_spider/core/spider"
"tesou.io/platform/foot-parent/foot-api/common/base"
"tesou.io/platform/foot-parent/foot-spider/module/win007/down"
"regexp"
"strconv"
"strings"
"tesou.io/platform/foot-parent/foot-api/common/base"
entity2 "tesou.io/platform/foot-parent/foot-api/module/elem/pojo"
"tesou.io/platform/foot-parent/foot-api/module/match/pojo"
entity3 "tesou.io/platform/foot-parent/foot-api/module/odds/pojo"
"tesou.io/platform/foot-parent/foot-core/module/elem/service"
service2 "tesou.io/platform/foot-parent/foot-core/module/odds/service"
"tesou.io/platform/foot-parent/foot-spider/module/win007"
"tesou.io/platform/foot-parent/foot-spider/module/win007/down"
"tesou.io/platform/foot-parent/foot-spider/module/win007/vo"
)
......@@ -24,38 +24,56 @@ type EuroLastProcesser struct {
service.CompService
service2.EuroLastService
service2.EuroHisService
//入参
MatchLastList []*pojo.MatchLast
//博彩公司对应的win007id
CompWin007Ids []string
MatchLastList []*pojo.MatchLast
Win007idMatchidMap map[string]string
}
func GetEuroLastProcesser() *EuroLastProcesser {
return &EuroLastProcesser{}
processer := &EuroLastProcesser{}
processer.Init()
return processer
}
func (this *EuroLastProcesser) Startup() {
func (this *EuroLastProcesser) Init() {
//初始化参数值
this.Win007idMatchidMap = map[string]string{}
}
newSpider := spider.NewSpider(this, "EuroLastProcesser")
func (this *EuroLastProcesser) Setup(temp *EuroLastProcesser) {
//设置参数值
this.CompWin007Ids = temp.CompWin007Ids
}
for _, v := range this.MatchLastList {
i := v.Ext[win007.MODULE_FLAG]
bytes, _ := json.Marshal(i)
func (this *EuroLastProcesser) Startup() {
for i, v := range this.MatchLastList {
var processer *EuroLastProcesser
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30条线程
processer = GetEuroLastProcesser()
processer.Setup(this)
}
newSpider := spider.NewSpider(processer, "EuroLastProcesser"+strconv.Itoa(i))
temp_flag := v.Ext[win007.MODULE_FLAG]
bytes, _ := json.Marshal(temp_flag)
matchExt := new(pojo.MatchExt)
json.Unmarshal(bytes, matchExt)
win007_id := matchExt.Sid
this.Win007idMatchidMap[win007_id] = v.Id
processer.Win007idMatchidMap[win007_id] = v.Id
url := strings.Replace(win007.WIN007_EUROODD_URL_PATTERN, "${matchId}", win007_id, 1)
newSpider = newSpider.AddUrl(url, "html")
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand", 100, 2000)
newSpider.SetThreadnum(1).Run()
}
newSpider.SetDownloader(down.NewMWin007Downloader())
newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
newSpider.SetSleepTime("rand",100,2000)
newSpider.SetThreadnum(1).Run()
}
func (this *EuroLastProcesser) Process(p *page.Page) {
......@@ -79,12 +97,12 @@ func (this *EuroLastProcesser) Process(p *page.Page) {
return
}
base.Log.Info("hdata_str",hdata_str,"URL:", request.Url)
base.Log.Info("hdata_str", hdata_str, "URL:", request.Url)
// 获取script脚本中的,博彩公司信息
hdata_str = strings.Replace(hdata_str, ";", "", 1)
hdata_str = strings.Replace(hdata_str, "var hData = ", "", 1)
if hdata_str == "" {
base.Log.Info("hdata_str:解析失败,",hdata_str,"URL:", request.Url)
base.Log.Info("hdata_str:解析失败,", hdata_str, "URL:", request.Url)
return
}
this.hdata_process(request.Url, hdata_str)
......@@ -138,7 +156,7 @@ func (this *EuroLastProcesser) hdata_process(url string, hdata_str string) {
last.Ep1 = v.Rs
last.Ep0 = v.Rg
last_temp_id,last_exists := this.EuroLastService.Exist(last)
last_temp_id, last_exists := this.EuroLastService.Exist(last)
if !last_exists {
last_slice = append(last_slice, last)
} else {
......@@ -154,18 +172,18 @@ func (this *EuroLastProcesser) hdata_process(url string, hdata_str string) {
//历史数据
his_slice := make([]interface{}, 0)
his_update_slice := make([]interface{}, 0)
last_all_slice := append(last_slice,last_update_slice)
for _,e := range last_all_slice {
last_all_slice := append(last_slice, last_update_slice)
for _, e := range last_all_slice {
bytes, _ := json.Marshal(e)
temp := new(entity3.EuroLast)
json.Unmarshal(bytes, temp)
if len(temp.MatchId) <= 0{
if len(temp.MatchId) <= 0 {
continue
}
his := new(entity3.EuroHis)
his.EuroLast = *temp
his_temp_id,his_exists := this.EuroHisService.Exist(his)
his_temp_id, his_exists := this.EuroHisService.Exist(his)
if !his_exists {
his_slice = append(his_slice, his)
} else {
......@@ -176,7 +194,6 @@ func (this *EuroLastProcesser) hdata_process(url string, hdata_str string) {
this.EuroHisService.SaveList(his_slice)
this.EuroHisService.ModifyList(his_update_slice)
}
func (this *EuroLastProcesser) Finish() {
......
......@@ -23,10 +23,10 @@ type EuroTrackProcesser struct {
service2.EuroLastService
service2.EuroHisService
service2.EuroTrackService
//博彩公司对应的win007id
CompWin007Ids []string
//入参
MatchLastList []*pojo.MatchLast
//博彩公司对应的win007id
CompWin007Ids []string
Win007idMatchidMap map[string]string
}
......@@ -41,12 +41,19 @@ func (this *EuroTrackProcesser) Init() {
this.Win007idMatchidMap = map[string]string{}
}
func (this *EuroTrackProcesser) Setup(temp *EuroTrackProcesser) {
//设置参数值
this.CompWin007Ids = temp.CompWin007Ids
}
func (this *EuroTrackProcesser) Startup() {
for i, v := range this.MatchLastList {
var processer *EuroTrackProcesser
if i%10000 == 0 { //10000个比赛一个spider,一个赛季大概有30万场比赛,最多30条线程
processer = GetEuroTrackProcesser()
processer.Setup(this)
}
newSpider := spider.NewSpider(processer, "EuroTrackProcesser"+strconv.Itoa(i))
......@@ -58,7 +65,7 @@ func (this *EuroTrackProcesser) Startup() {
processer.Win007idMatchidMap[win007_id] = v.Id
base_url := strings.Replace(win007.WIN007_EUROODD_BET_URL_PATTERN, "${scheid}", win007_id, 1)
for _, v := range this.CompWin007Ids {
for _, v := range processer.CompWin007Ids {
url := strings.Replace(base_url, "${cId}", v, 1)
newSpider = newSpider.AddUrl(url, "html")
}
......
......@@ -38,6 +38,11 @@ func (this *LeagueSeasonProcesser) Init() {
this.sUrl_leagueId = make(map[string]string)
}
func (this *LeagueSeasonProcesser) Setup(temp *LeagueSeasonProcesser) {
//设置参数值
}
func (this *LeagueSeasonProcesser) Startup() {
//1.获取所有的联赛
leaguesList := make([]*pojo.League, 0)
......@@ -56,6 +61,7 @@ func (this *LeagueSeasonProcesser) Startup() {
var processer *LeagueSeasonProcesser
if i % 100 == 0 {//100个联赛一个spider,总数1000多个联赛
processer = GetLeagueSeasonProcesser()
processer.Setup(this)
}
newSpider := spider.NewSpider(processer, "LeagueSeasonProcesser"+strconv.Itoa(i))
......
......@@ -51,6 +51,10 @@ func (this *MatchHisProcesser) Init() {
this.SUrl_Season = make(map[string]*pojo.LeagueSeason)
}
func (this *MatchHisProcesser) Setup(temp *MatchHisProcesser) {
//设置参数值
}
func (this *MatchHisProcesser) Startup() {
//1.获取所有的联赛赛季信息
......@@ -61,6 +65,7 @@ func (this *MatchHisProcesser) Startup() {
var processer *MatchHisProcesser
if i%1000 == 0 { //1000个比赛一个spider,一个赛季大概有30万场比赛,最多300条线程
processer = GetMatchHisProcesser()
processer.Setup(this)
}
newSpider := spider.NewSpider(processer, "MatchHisProcesser"+strconv.Itoa(i))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册