From 8361e5f419442ba2419edfe9ba510f1e8ca1a696 Mon Sep 17 00:00:00 2001 From: "prgrmz01@163.com" Date: Wed, 14 Dec 2022 10:02:26 +0800 Subject: [PATCH] =?UTF-8?q?=E9=87=8D=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://gitcode.net/pubx/github-hosts-deleted/-/commit/ecd724c189e65b6f8b9603bcb6a8a9546699f727 --- get_from_www_ipaddress_com.py | 47 ++++++++++++++++ github_hosts_fetch.py | 100 ++++------------------------------ host_list_config.py | 35 ++++++++++++ hosts | 44 +++++++-------- requirements.txt | 2 + util.py | 23 ++++++++ 6 files changed, 140 insertions(+), 111 deletions(-) create mode 100644 get_from_www_ipaddress_com.py create mode 100644 host_list_config.py create mode 100644 requirements.txt create mode 100644 util.py diff --git a/get_from_www_ipaddress_com.py b/get_from_www_ipaddress_com.py new file mode 100644 index 0000000..01d8fbc --- /dev/null +++ b/get_from_www_ipaddress_com.py @@ -0,0 +1,47 @@ +from time import sleep +from typing import List + +import requests +from lxml import etree + + + +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', +}#/html/body/div[1]/main/section[3]/div[2]/table/tbody/tr[6]/td/ul/li[1] +# ip_xpath:str="/html/body/div[1]/main/section[3]/div/table/tbody/tr[6]/td/ul/li[1]" +#/html/body/div[1]/main/section[2]/div[1]/table/tbody/tr[6]/td/ul/li[1] +ip_xpath:str="/html/body/div[1]/main/section/div/table/tbody/tr[6]/td/ul/li" +HTTP_OK:int=200 +def fetch_ip_from__www_ipaddress_com(host:str)->str: + """ + :param host: + github.global.ssl.fastly.net + :return: + """ + + url:str=f"https://www.ipaddress.com/site/{host}" + while True: + try: + response:requests.models.Response=requests.get(url=f"https://www.ipaddress.com/site/{host}",headers=headers,timeout=5) + except requests.exceptions.ReadTimeout as e: + print(f"{url},{e},sleep 3s,continue") + sleep(3) + continue + # print(response.status_code) + if response.status_code != HTTP_OK: + print(f"retry because http_response.status_code: {url}, {response.status_code}") + continue + response_text:str=response.text + break + + dom:etree._Element=etree.HTML(response_text) + _ls:List[etree._Element]=dom.xpath(ip_xpath) + _text_ls:List[str]=list(map(lambda i:i.text,_ls)) + print(f"{url},{_text_ls}") + return _text_ls + + +#test me: +# if __name__=="__main__": +# host_ls:List[str]=fetch_ip_from__www_ipaddress_com("github.global.ssl.fastly.net") \ No newline at end of file diff --git a/github_hosts_fetch.py b/github_hosts_fetch.py index 8a0cb52..b0770f9 100644 --- a/github_hosts_fetch.py +++ b/github_hosts_fetch.py @@ -1,109 +1,31 @@ -import time -from time import sleep -from typing import List, Tuple, Any +from typing import List -from lxml import etree -import requests +from get_from_www_ipaddress_com import fetch_ip_from__www_ipaddress_com +from util import accessIpRootPathWebNanoSeconds, findMiniValueWithIdx -headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', -}#/html/body/div[1]/main/section[3]/div[2]/table/tbody/tr[6]/td/ul/li[1] -# ip_xpath:str="/html/body/div[1]/main/section[3]/div/table/tbody/tr[6]/td/ul/li[1]" -#/html/body/div[1]/main/section[2]/div[1]/table/tbody/tr[6]/td/ul/li[1] -ip_xpath:str="/html/body/div[1]/main/section/div/table/tbody/tr[6]/td/ul/li" -HTTP_OK:int=200 -def fetch_ip(host:str)->str: - """ - :param host: - github.global.ssl.fastly.net - :return: - """ - url:str=f"https://www.ipaddress.com/site/{host}" - while True: - try: - response:requests.models.Response=requests.get(url=f"https://www.ipaddress.com/site/{host}",headers=headers,timeout=1) - except requests.exceptions.ReadTimeout as e: - print(f"{url},{e},sleep 3s,continue") - sleep(3) - continue - # print(response.status_code) - if response.status_code != HTTP_OK: - print(f"retry because http_response.status_code: {url}, {response.status_code}") - continue - response_text:str=response.text - break - - dom:etree._Element=etree.HTML(response_text) - _ls:List[etree._Element]=dom.xpath(ip_xpath) - _text_ls:List[str]=list(map(lambda i:i.text,_ls)) - print(f"{url},{_text_ls}") - return _text_ls #https://www.ipaddress.com/site/github.global.ssl.fastly.net # _1=fetch_ip("github.global.ssl.fastly.net") #https://www.ipaddress.com/site/github.githubassets.com # _2=fetch_ip("github.githubassets.com") -def httpIpRootPathNanoSeconds(ip:str)->int: - time.sleep(1) - begin:int=time.time_ns() - requests.get(f"http://{ip}") - end:int=time.time_ns() - delta_ns:int=end-begin - return delta_ns - -def minWithIdx(ls:List[Any])->Tuple[int,Any]: - pass - def iterateHostK(k, hostK, _ipKLs)->str: ipKLs=list(filter(lambda i:":" not in i,_ipKLs)) #clear ipv6 address if ipKLs is not None and len(ipKLs)>0: - if len(ipKLs) == 1: + if len(ipKLs) > 1: + ns_ls:List[int]=[accessIpRootPathWebNanoSeconds(ipJ) for j,ipJ in enumerate(ipKLs)] + j,_ns= findMiniValueWithIdx(ns_ls) + print(f"{hostK}:fast ip is :in idx {j},{ipKLs[j]},{_ns}ns") + return f"{ipKLs[j]} {hostK} #{' '.join([*ipKLs[0:j], *ipKLs[j+1:]])}" + else: # len(ipKLs) == 1 return f"{ipKLs[0]} {hostK} " - else: - ns_ls:List[int]=[httpIpRootPathNanoSeconds(ipK) for i,ipK in enumerate(ipKLs)] - i,_ns=minWithIdx(ns_ls) - return f"{ipKLs[0]} {hostK} #{' '.join(ipKLs[1:])}" else: return f'#{hostK}' +from host_list_config import host_ls if __name__=="__main__": - host_ls:List[str]=[ - "github.githubassets.com", - "central.github.com", - "desktop.githubusercontent.com", - "assets-cdn.github.com", - "camo.githubusercontent.com", - "github.map.fastly.net", - "github.global.ssl.fastly.net", - "gist.github.com", - "github.io", - "github.com", - "api.github.com", - "raw.githubusercontent.com", - "user-images.githubusercontent.com", - "favicons.githubusercontent.com", - "avatars5.githubusercontent.com", - "avatars4.githubusercontent.com", - "avatars3.githubusercontent.com", - "avatars2.githubusercontent.com", - "avatars1.githubusercontent.com", - "avatars0.githubusercontent.com", - "avatars.githubusercontent.com", - "codeload.github.com", - "github-cloud.s3.amazonaws.com", - "github-com.s3.amazonaws.com", - "github-production-release-asset-2e65be.s3.amazonaws.com", - "github-production-user-asset-6210df.s3.amazonaws.com", - "github-production-repository-file-5c1aeb.s3.amazonaws.com", - "githubstatus.com", - "github.community", - "media.githubusercontent.com", - "objects.githubusercontent.com" - ] - ip_ls=[fetch_ip(hostK) for hostK in host_ls] - + ip_ls=[fetch_ip_from__www_ipaddress_com(hostK) for hostK in host_ls] line_ls=[iterateHostK(k, hostK, ip_ls[k]) for k,hostK in enumerate(host_ls)] text="\n".join(line_ls) with open("./hosts","w") as f: f.write(text) diff --git a/host_list_config.py b/host_list_config.py new file mode 100644 index 0000000..a8a8a56 --- /dev/null +++ b/host_list_config.py @@ -0,0 +1,35 @@ +from typing import List + +host_ls: List[str] = [ + "github.githubassets.com", + "central.github.com", + "desktop.githubusercontent.com", + "assets-cdn.github.com", + "camo.githubusercontent.com", + "github.map.fastly.net", + "github.global.ssl.fastly.net", + "gist.github.com", + "github.io", + "github.com", + "api.github.com", + "raw.githubusercontent.com", + "user-images.githubusercontent.com", + "favicons.githubusercontent.com", + "avatars5.githubusercontent.com", + "avatars4.githubusercontent.com", + "avatars3.githubusercontent.com", + "avatars2.githubusercontent.com", + "avatars1.githubusercontent.com", + "avatars0.githubusercontent.com", + "avatars.githubusercontent.com", + "codeload.github.com", + "github-cloud.s3.amazonaws.com", + "github-com.s3.amazonaws.com", + "github-production-release-asset-2e65be.s3.amazonaws.com", + "github-production-user-asset-6210df.s3.amazonaws.com", + "github-production-repository-file-5c1aeb.s3.amazonaws.com", + "githubstatus.com", + "github.community", + "media.githubusercontent.com", + "objects.githubusercontent.com" +] \ No newline at end of file diff --git a/hosts b/hosts index f26b689..8c75a2a 100644 --- a/hosts +++ b/hosts @@ -1,31 +1,31 @@ -185.199.108.154 github.githubassets.com #185.199.109.154 185.199.110.154 185.199.111.154 +185.199.109.154 github.githubassets.com #185.199.108.154 185.199.110.154 185.199.111.154 140.82.112.21 central.github.com -185.199.108.133 desktop.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 -185.199.108.153 assets-cdn.github.com #185.199.110.153 185.199.111.153 185.199.109.153 2606:50c0:8000::153 2606:50c0:8001::153 2606:50c0:8002::153 2606:50c0:8003::153 -185.199.108.133 camo.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 +185.199.111.133 desktop.githubusercontent.com #185.199.108.133 185.199.109.133 185.199.110.133 +185.199.111.153 assets-cdn.github.com #185.199.108.153 185.199.110.153 185.199.109.153 +185.199.111.133 camo.githubusercontent.com #185.199.108.133 185.199.109.133 185.199.110.133 185.199.108.133 github.map.fastly.net #185.199.109.133 185.199.110.133 185.199.111.133 -151.101.1.194 github.global.ssl.fastly.net #151.101.65.194 151.101.129.194 151.101.193.194 +151.101.193.194 github.global.ssl.fastly.net #151.101.1.194 151.101.65.194 151.101.129.194 140.82.112.4 gist.github.com -185.199.108.153 github.io #185.199.110.153 185.199.111.153 185.199.109.153 +185.199.109.153 github.io #185.199.108.153 185.199.110.153 185.199.111.153 140.82.114.3 github.com 140.82.114.6 api.github.com -185.199.108.133 raw.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 2606:50c0:8000::154 2606:50c0:8001::154 2606:50c0:8002::154 2606:50c0:8003::154 -185.199.108.133 user-images.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 2606:50c0:8000::154 2606:50c0:8001::154 2606:50c0:8002::154 2606:50c0:8003::154 -185.199.108.133 favicons.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 -185.199.108.133 avatars5.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 -185.199.108.133 avatars4.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 -185.199.108.133 avatars3.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 -185.199.108.133 avatars2.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 -185.199.108.133 avatars1.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 -185.199.108.133 avatars0.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 -185.199.108.133 avatars.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 2606:50c0:8000::154 2606:50c0:8001::154 2606:50c0:8002::154 2606:50c0:8003::154 +185.199.111.133 raw.githubusercontent.com #185.199.108.133 185.199.109.133 185.199.110.133 +185.199.111.133 user-images.githubusercontent.com #185.199.108.133 185.199.109.133 185.199.110.133 +185.199.109.133 favicons.githubusercontent.com #185.199.108.133 185.199.110.133 185.199.111.133 +185.199.111.133 avatars5.githubusercontent.com #185.199.108.133 185.199.109.133 185.199.110.133 +185.199.109.133 avatars4.githubusercontent.com #185.199.108.133 185.199.110.133 185.199.111.133 +185.199.110.133 avatars3.githubusercontent.com #185.199.108.133 185.199.109.133 185.199.111.133 +185.199.111.133 avatars2.githubusercontent.com #185.199.108.133 185.199.109.133 185.199.110.133 +185.199.111.133 avatars1.githubusercontent.com #185.199.108.133 185.199.109.133 185.199.110.133 +185.199.109.133 avatars0.githubusercontent.com #185.199.108.133 185.199.110.133 185.199.111.133 +185.199.111.133 avatars.githubusercontent.com #185.199.108.133 185.199.109.133 185.199.110.133 140.82.114.9 codeload.github.com -52.216.25.84 github-cloud.s3.amazonaws.com #52.216.44.65 52.216.83.64 52.217.12.28 52.217.32.172 52.217.48.36 52.217.72.4 54.231.169.1 -3.5.16.11 github-com.s3.amazonaws.com #52.216.42.249 52.216.244.84 52.217.167.105 52.217.197.1 52.217.198.241 54.231.169.113 54.231.169.145 -52.216.8.171 github-production-release-asset-2e65be.s3.amazonaws.com #52.216.131.187 52.216.136.236 52.216.170.11 52.217.18.76 52.217.64.156 52.217.235.49 54.231.233.225 -3.5.16.183 github-production-user-asset-6210df.s3.amazonaws.com #3.5.17.0 52.216.110.51 52.216.200.67 52.216.237.139 52.217.104.132 52.217.162.81 54.231.193.209 +54.231.169.1 github-cloud.s3.amazonaws.com #52.216.25.84 52.216.44.65 52.216.83.64 52.217.12.28 52.217.32.172 52.217.48.36 52.217.72.4 +54.231.169.145 github-com.s3.amazonaws.com #3.5.16.11 52.216.42.249 52.216.244.84 52.217.167.105 52.217.197.1 52.217.198.241 54.231.169.113 +54.231.233.225 github-production-release-asset-2e65be.s3.amazonaws.com #52.216.8.171 52.216.131.187 52.216.136.236 52.216.170.11 52.217.18.76 52.217.64.156 52.217.235.49 +52.217.104.132 github-production-user-asset-6210df.s3.amazonaws.com #3.5.16.183 3.5.17.0 52.216.110.51 52.216.200.67 52.216.237.139 52.217.162.81 54.231.193.209 3.5.6.186 github-production-repository-file-5c1aeb.s3.amazonaws.com #52.216.33.33 52.216.60.89 52.217.44.156 52.217.105.140 54.231.128.65 54.231.133.209 54.231.161.81 185.199.108.153 githubstatus.com #185.199.110.153 185.199.111.153 185.199.109.153 140.82.114.17 github.community -185.199.108.133 media.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 2606:50c0:8000::154 2606:50c0:8001::154 2606:50c0:8002::154 2606:50c0:8003::154 -185.199.108.133 objects.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 \ No newline at end of file +185.199.108.133 media.githubusercontent.com #185.199.109.133 185.199.110.133 185.199.111.133 +185.199.109.133 objects.githubusercontent.com #185.199.108.133 185.199.110.133 185.199.111.133 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a3e328b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +lxml>=4.0.0 +requests>=2.12.5 diff --git a/util.py b/util.py new file mode 100644 index 0000000..83f7515 --- /dev/null +++ b/util.py @@ -0,0 +1,23 @@ +import time +from typing import List, Any, Tuple + +import requests + + +def accessIpRootPathWebNanoSeconds(ipJ:str)->int: + time.sleep(0.1) + begin:int=time.time_ns() + requests.get(f"http://{ipJ}") + end:int=time.time_ns() + delta_ns:int=end-begin + return delta_ns + + +def findMiniValueWithIdx(ls:List[Any])->Tuple[int,Any]: + assert ls is not None and len(ls)>0 + minIdx,min=0,ls[0] + for k,eleK in enumerate(ls): + if min>eleK: + minIdx=k + assert minIdx is not None + return minIdx,min -- GitLab