LeagueProcesser.go 3.8 KB
Newer Older
S
shi.zeyuan 已提交
1 2 3 4 5 6 7
package proc

import (
	"github.com/PuerkitoBio/goquery"
	"github.com/hu17889/go_spider/core/common/page"
	"github.com/hu17889/go_spider/core/pipeline"
	"github.com/hu17889/go_spider/core/spider"
M
monomania 已提交
8
	"strconv"
S
shi.zeyuan 已提交
9 10 11 12 13 14 15 16 17 18 19 20
	"strings"
	"tesou.io/platform/foot-parent/foot-api/common/base"
	entity2 "tesou.io/platform/foot-parent/foot-api/module/elem/pojo"
	service2 "tesou.io/platform/foot-parent/foot-core/module/elem/service"
	"tesou.io/platform/foot-parent/foot-spider/module/win007"
	"tesou.io/platform/foot-parent/foot-spider/module/win007/down"
)

type LeagueProcesser struct {
	service2.LeagueService
	service2.CompService
	//联赛数据
M
monomania 已提交
21 22 23
	league_list []*entity2.League
	sUrl_Id     map[string]string
	sUrl_Name   map[string]string
S
shi.zeyuan 已提交
24 25 26
}

func GetLeagueProcesser() *LeagueProcesser {
M
monomania 已提交
27 28 29
	processer := &LeagueProcesser{}
	processer.Init()
	return processer
S
shi.zeyuan 已提交
30 31
}

M
monomania 已提交
32 33 34 35 36 37 38
func (this *LeagueProcesser) Init() {
	//初始化参数值
	this.league_list = make([]*entity2.League, 0)
	this.sUrl_Id = make(map[string]string)
	this.sUrl_Name = make(map[string]string)
}

M
monomania 已提交
39 40 41
func (this *LeagueProcesser) Setup(temp *LeagueProcesser) {
	//设置参数值
}
M
monomania 已提交
42

S
shi.zeyuan 已提交
43 44 45 46
func (this *LeagueProcesser) Startup() {
	//sid 数据
	sid_stat_url := "http://m.win007.com/info.htm#section0";
	document, _ := GetDocument(sid_stat_url)
M
monomania 已提交
47 48

	var processer *LeagueProcesser
S
shi.zeyuan 已提交
49 50 51 52 53
	document.Find("a[href*='sid']").Each(func(i int, selection *goquery.Selection) {
		sUrl, _ := selection.Attr("href")
		sId := strings.Split(sUrl, "sid=")[1]
		sName := strings.TrimSpace(selection.Text())
		base.Log.Info("sId:", sId, ",sName:", sName, ",sUrl:"+sUrl)
M
monomania 已提交
54 55 56 57 58 59 60 61 62 63 64 65
		if len(sUrl) <= 0 {
			return
		}

		if i%100 == 0 { //100个联赛一个spider,总数1000多个联赛
			processer = GetLeagueProcesser()
			processer.Setup(this)
		}
		newSpider := spider.NewSpider(processer, "LeagueProcesser"+strconv.Itoa(i))

		processer.sUrl_Id[win007.WIN007_BASE_URL+sUrl] = sId
		processer.sUrl_Name[win007.WIN007_BASE_URL+sUrl] = sName
S
shi.zeyuan 已提交
66
		newSpider = newSpider.AddUrl(win007.WIN007_BASE_URL+sUrl, "html")
M
monomania 已提交
67 68 69 70
		newSpider.SetDownloader(down.NewMWin007Downloader())
		newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
		newSpider.SetSleepTime("rand", 1000, 20000)
		newSpider.SetThreadnum(1).Run()
S
shi.zeyuan 已提交
71
	})
M
monomania 已提交
72

S
shi.zeyuan 已提交
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
}

func (this *LeagueProcesser) Process(p *page.Page) {
	request := p.GetRequest()
	if !p.IsSucc() {
		base.Log.Error("URL:,", request.Url, p.Errormsg())
		return
	}

	rawText := p.GetBodyStr()
	if rawText == "" {
		base.Log.Error("rawText:为空.url:", request.Url)
		return
	}

	sUrl := request.Url
	sId := this.sUrl_Id[sUrl]
	sName := this.sUrl_Name[sUrl]

	p.GetHtmlParser().Find("a.gameItem[href*='info'][href*='htm']").Each(func(i int, selection *goquery.Selection) {
		lUrl, _ := selection.Attr("href")
		l_arr := strings.Split(lUrl, "/")
		lId_suffix := l_arr[len(l_arr)-1]
M
monomania 已提交
96
		lId := strings.ReplaceAll(lId_suffix, ".htm", "")
S
shi.zeyuan 已提交
97 98 99 100 101 102 103
		lName := strings.TrimSpace(selection.Text())
		base.Log.Info("lId:", lId, ",lName:", lName, ",lUrl:"+lUrl)
		league := new(entity2.League)
		league.Id = lId
		league.Name = lName
		league.Sid = sId
		league.SName = sName
S
shi.zeyuan 已提交
104
		league.ShortUrl = lUrl
M
monomania 已提交
105
		if strings.Contains(lUrl, "Cup") {
M
monomania 已提交
106 107
			league.Cup = true
		}
M
monomania 已提交
108
		if strings.Contains(lUrl, "-") {
M
monomania 已提交
109 110 111
			league.SeasonCross = true
		}

M
monomania 已提交
112
		this.league_list = append(this.league_list, league)
S
shi.zeyuan 已提交
113 114 115 116 117 118 119 120 121 122 123 124 125 126
	})
}

func (this *LeagueProcesser) Finish() {
	base.Log.Info("联赛解析完成,执行入库 \r\n")

	league_list_slice := make([]interface{}, 0)
	league_modify_list_slice := make([]interface{}, 0)
	for _, v := range this.league_list {
		if nil == v {
			continue
		}
		exists := this.LeagueService.ExistById(v.Id)
		if exists {
M
monomania 已提交
127
			league_modify_list_slice = append(league_modify_list_slice, v)
S
shi.zeyuan 已提交
128 129 130 131 132 133 134
			continue
		}
		league_list_slice = append(league_list_slice, v)
	}
	this.LeagueService.SaveList(league_list_slice)
	this.LeagueService.ModifyList(league_modify_list_slice)
}