提交 3f72bb3c 编写于 作者: Miykael_xxm's avatar Miykael_xxm 🚴

add repo info crawler python script

上级 ca6463b2
# -*- coding: utf-8 -*-
from os import link
import requests
import json
import time
import re
# 读取json文件
with open("dataset/repo-list.json", 'r') as f:
data = json.load(f)
with open('dataset/result.txt', 'r+') as file:
file.truncate(0)
for list_item in data:
# print(list_item["full_name"])
a = list_item["full_name"]
url = "https://api.github.com/repos/" + a
# url2 = "https://api.github.com/repos/" + a + "/contributors?per_page=1&anon=true"
payload={}
headers = {
'Authorization': '' # 这里填入你自己的 GitHub Personal Access Token
}
response = requests.request("GET", url, headers=headers, data=payload)
# response2 = requests.request("GET", url2, headers=headers, data=payload)
# headers2 = requests.get(url2).headers
# print(headers2)
# pagesize = [int(s) for s in re.findall(r'\b\d+\b',headers2['Link'])]
# print(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(response.json()["open_issues"]) + ',' + str(pagesize[-1]))
# with open('dataset/result.txt', 'a') as f:
# f.write(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(pagesize[-1]) + "\n")
# time.sleep(4) # Sleep for 2 seconds
# print(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(response.json()["open_issues"]))
with open('dataset/result.txt', 'a') as f:
f.write(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + "\n")
time.sleep(2) # Sleep for 2 seconds
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册