LeagueProcesser.go 4.2 KB
Newer Older
S
shi.zeyuan 已提交
1 2 3 4 5 6 7
package proc

import (
	"github.com/PuerkitoBio/goquery"
	"github.com/hu17889/go_spider/core/common/page"
	"github.com/hu17889/go_spider/core/pipeline"
	"github.com/hu17889/go_spider/core/spider"
M
monomania 已提交
8
	"strconv"
S
shi.zeyuan 已提交
9 10 11 12 13 14 15 16 17 18 19
	"strings"
	"tesou.io/platform/foot-parent/foot-api/common/base"
	entity2 "tesou.io/platform/foot-parent/foot-api/module/elem/pojo"
	service2 "tesou.io/platform/foot-parent/foot-core/module/elem/service"
	"tesou.io/platform/foot-parent/foot-spider/module/win007"
	"tesou.io/platform/foot-parent/foot-spider/module/win007/down"
)

type LeagueProcesser struct {
	service2.LeagueService
	service2.CompService
S
1.xx  
shi.zeyuan 已提交
20 21
	//是否是单线程
	SingleThread bool
S
shi.zeyuan 已提交
22
	//联赛数据
M
monomania 已提交
23 24 25
	league_list []*entity2.League
	sUrl_Id     map[string]string
	sUrl_Name   map[string]string
S
shi.zeyuan 已提交
26 27 28
}

func GetLeagueProcesser() *LeagueProcesser {
M
monomania 已提交
29 30 31
	processer := &LeagueProcesser{}
	processer.Init()
	return processer
S
shi.zeyuan 已提交
32 33
}

M
monomania 已提交
34 35 36 37 38 39 40
func (this *LeagueProcesser) Init() {
	//初始化参数值
	this.league_list = make([]*entity2.League, 0)
	this.sUrl_Id = make(map[string]string)
	this.sUrl_Name = make(map[string]string)
}

M
monomania 已提交
41 42 43
func (this *LeagueProcesser) Setup(temp *LeagueProcesser) {
	//设置参数值
}
M
monomania 已提交
44

S
shi.zeyuan 已提交
45 46 47 48
func (this *LeagueProcesser) Startup() {
	//sid 数据
	sid_stat_url := "http://m.win007.com/info.htm#section0";
	document, _ := GetDocument(sid_stat_url)
M
monomania 已提交
49 50

	var processer *LeagueProcesser
M
monomania 已提交
51
	var newSpider *spider.Spider
S
shi.zeyuan 已提交
52 53 54 55 56
	document.Find("a[href*='sid']").Each(func(i int, selection *goquery.Selection) {
		sUrl, _ := selection.Attr("href")
		sId := strings.Split(sUrl, "sid=")[1]
		sName := strings.TrimSpace(selection.Text())
		base.Log.Info("sId:", sId, ",sName:", sName, ",sUrl:"+sUrl)
M
monomania 已提交
57 58 59 60
		if len(sUrl) <= 0 {
			return
		}

M
monomania 已提交
61
		if i%10 == 0 { //10个联赛一个spider,总数1000多个联赛,最多100spider
M
monomania 已提交
62 63
			processer = GetLeagueProcesser()
			processer.Setup(this)
M
monomania 已提交
64
			newSpider = spider.NewSpider(processer, "LeagueProcesser"+strconv.Itoa(i))
M
monomania 已提交
65 66 67 68
		}

		processer.sUrl_Id[win007.WIN007_BASE_URL+sUrl] = sId
		processer.sUrl_Name[win007.WIN007_BASE_URL+sUrl] = sName
S
shi.zeyuan 已提交
69
		newSpider = newSpider.AddUrl(win007.WIN007_BASE_URL+sUrl, "html")
M
monomania 已提交
70 71 72
		if i%10 == 0 { //10个联赛一个spider,总数1000多个联赛,最多100spider
			newSpider.SetDownloader(down.NewMWin007Downloader())
			newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
M
monomania 已提交
73
			newSpider.SetSleepTime("rand", win007.SLEEP_RAND_S, win007.SLEEP_RAND_E)
M
monomania 已提交
74 75
			newSpider.SetThreadnum(1).Run()
		}
S
shi.zeyuan 已提交
76
	})
M
monomania 已提交
77

M
monomania 已提交
78 79
	newSpider.SetDownloader(down.NewMWin007Downloader())
	newSpider = newSpider.AddPipeline(pipeline.NewPipelineConsole())
M
monomania 已提交
80
	newSpider.SetSleepTime("rand", win007.SLEEP_RAND_S, win007.SLEEP_RAND_E)
M
monomania 已提交
81 82
	newSpider.SetThreadnum(1).Run()

S
shi.zeyuan 已提交
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
}

func (this *LeagueProcesser) Process(p *page.Page) {
	request := p.GetRequest()
	if !p.IsSucc() {
		base.Log.Error("URL:,", request.Url, p.Errormsg())
		return
	}

	rawText := p.GetBodyStr()
	if rawText == "" {
		base.Log.Error("rawText:为空.url:", request.Url)
		return
	}

	sUrl := request.Url
	sId := this.sUrl_Id[sUrl]
	sName := this.sUrl_Name[sUrl]

	p.GetHtmlParser().Find("a.gameItem[href*='info'][href*='htm']").Each(func(i int, selection *goquery.Selection) {
		lUrl, _ := selection.Attr("href")
		l_arr := strings.Split(lUrl, "/")
		lId_suffix := l_arr[len(l_arr)-1]
M
monomania 已提交
106
		lId := strings.ReplaceAll(lId_suffix, ".htm", "")
S
shi.zeyuan 已提交
107 108 109 110 111 112 113
		lName := strings.TrimSpace(selection.Text())
		base.Log.Info("lId:", lId, ",lName:", lName, ",lUrl:"+lUrl)
		league := new(entity2.League)
		league.Id = lId
		league.Name = lName
		league.Sid = sId
		league.SName = sName
S
shi.zeyuan 已提交
114
		league.ShortUrl = lUrl
M
monomania 已提交
115
		if strings.Contains(lUrl, "Cup") {
M
monomania 已提交
116 117
			league.Cup = true
		}
M
monomania 已提交
118
		if strings.Contains(lUrl, "-") {
M
monomania 已提交
119 120 121
			league.SeasonCross = true
		}

M
monomania 已提交
122
		this.league_list = append(this.league_list, league)
S
shi.zeyuan 已提交
123 124 125 126 127 128 129 130 131 132 133 134 135 136
	})
}

func (this *LeagueProcesser) Finish() {
	base.Log.Info("联赛解析完成,执行入库 \r\n")

	league_list_slice := make([]interface{}, 0)
	league_modify_list_slice := make([]interface{}, 0)
	for _, v := range this.league_list {
		if nil == v {
			continue
		}
		exists := this.LeagueService.ExistById(v.Id)
		if exists {
M
monomania 已提交
137
			league_modify_list_slice = append(league_modify_list_slice, v)
S
shi.zeyuan 已提交
138 139 140 141 142 143 144
			continue
		}
		league_list_slice = append(league_list_slice, v)
	}
	this.LeagueService.SaveList(league_list_slice)
	this.LeagueService.ModifyList(league_modify_list_slice)
}