提交 1e93b039 编写于 作者: F feilong

refactor data dir

上级 6f0bde03
......@@ -23,7 +23,7 @@ config = {
},
"schema": {
"repo_github_user_info": {
"file": "../GitHub/Userinfo.xlsx",
"file": "../data/GitHub/Userinfo.xlsx",
"sheet_name": "repo",
"desc": "开源项目Github贡献者信息",
"fields": [
......@@ -100,7 +100,7 @@ config = {
]
},
"repo_github_info": {
"file": "../Github-Repos.xlsx",
"file": "../data/Github-Repos.xlsx",
"sheet_name": "汇总",
"desc": "开源项目在Github上的项目交互数据",
"fields": [
......@@ -162,7 +162,7 @@ config = {
]
},
"repo_csdn_trends": {
"file": "../CSDN/repo-csdn-trends.xlsx",
"file": "../data/CSDN/repo-csdn-trends.xlsx",
"sheet_name": "Sheet1",
"desc": "开源项目在CSDN站内指数数据",
"fields": [
......@@ -179,7 +179,7 @@ config = {
]
},
"repo_commit_rank": {
"file": "../CSDN/repo-commit-rank.csv",
"file": "../data/CSDN/repo-commit-rank.csv",
"desc": "开源项目在Github的月commit变化",
"fields": [
{
......@@ -205,7 +205,7 @@ config = {
]
},
"repo_github_active_trends": {
"file": "../PingCAP/项目活跃度变化.csv",
"file": "../data/PingCAP/项目活跃度变化.csv",
"desc": "开源项目在Github上的月活跃度数据",
"fields": [
{
......@@ -241,7 +241,7 @@ config = {
]
},
"repo_github_popular_trends": {
"file": "../PingCAP/项目受欢迎度变化.csv",
"file": "../data/PingCAP/项目受欢迎度变化.csv",
"desc": "开源项目在Github上的月收欢迎程度数据",
"fields": [
{
......
......@@ -6,38 +6,39 @@ import time
import re
# 读取json文件
with open("dataset/repo-list.json", 'r') as f:
with open("../data/dataset/repo-list.json", 'r') as f:
data = json.load(f)
with open('dataset/result.txt', 'r+') as file:
with open('../data/dataset/result.txt', 'r+') as file:
file.truncate(0)
for list_item in data:
# print(list_item["full_name"])
a = list_item["full_name"]
url = "https://api.github.com/repos/" + a
# url2 = "https://api.github.com/repos/" + a + "/contributors?per_page=1&anon=true"
payload={}
payload = {}
headers = {
'Authorization': '' # 这里填入你自己的 GitHub Personal Access Token
}
'Authorization': '' # 这里填入你自己的 GitHub Personal Access Token
}
response = requests.request("GET", url, headers=headers, data=payload)
# response2 = requests.request("GET", url2, headers=headers, data=payload)
# headers2 = requests.get(url2).headers
# print(headers2)
# pagesize = [int(s) for s in re.findall(r'\b\d+\b',headers2['Link'])]
# print(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(response.json()["open_issues"]) + ',' + str(pagesize[-1]))
# with open('dataset/result.txt', 'a') as f:
# f.write(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(pagesize[-1]) + "\n")
# time.sleep(4) # Sleep for 2 seconds
# print(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(response.json()["open_issues"]))
with open('dataset/result.txt', 'a') as f:
f.write(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + "\n")
time.sleep(2) # Sleep for 2 seconds
\ No newline at end of file
with open('../data/dataset/result.txt', 'a') as f:
f.write(str(response.json()["id"]) + ',' + response.json()["owner"]["login"] + ',' + response.json()[
"full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + "\n")
time.sleep(2) # Sleep for 2 seconds
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册