diff --git a/CSDN/Processor.jpg b/data/CSDN/Processor.jpg similarity index 100% rename from CSDN/Processor.jpg rename to data/CSDN/Processor.jpg diff --git a/CSDN/repo-commit-rank.csv b/data/CSDN/repo-commit-rank.csv similarity index 100% rename from CSDN/repo-commit-rank.csv rename to data/CSDN/repo-commit-rank.csv diff --git a/CSDN/repo-csdn-trends.xlsx b/data/CSDN/repo-csdn-trends.xlsx similarity index 100% rename from CSDN/repo-csdn-trends.xlsx rename to data/CSDN/repo-csdn-trends.xlsx diff --git a/CSDN/repository_commit_events_20200930_analysis_new-demo.csv b/data/CSDN/repository_commit_events_20200930_analysis_new-demo.csv similarity index 100% rename from CSDN/repository_commit_events_20200930_analysis_new-demo.csv rename to data/CSDN/repository_commit_events_20200930_analysis_new-demo.csv diff --git a/GitHub-Repos.xlsx b/data/GitHub-Repos.xlsx similarity index 100% rename from GitHub-Repos.xlsx rename to data/GitHub-Repos.xlsx diff --git a/GitHub/UserInfo.xlsx b/data/GitHub/UserInfo.xlsx similarity index 100% rename from GitHub/UserInfo.xlsx rename to data/GitHub/UserInfo.xlsx diff --git "a/OSS\346\225\260\346\215\256\351\234\200\346\261\202.xlsx" "b/data/OSS\346\225\260\346\215\256\351\234\200\346\261\202.xlsx" similarity index 100% rename from "OSS\346\225\260\346\215\256\351\234\200\346\261\202.xlsx" rename to "data/OSS\346\225\260\346\215\256\351\234\200\346\261\202.xlsx" diff --git "a/PingCAP/\350\277\22124\344\270\252\346\234\210\347\232\204\346\264\273\350\267\203\346\225\260\346\215\256\346\200\273\346\225\260.csv" "b/data/PingCAP/\350\277\22124\344\270\252\346\234\210\347\232\204\346\264\273\350\267\203\346\225\260\346\215\256\346\200\273\346\225\260.csv" similarity index 100% rename from "PingCAP/\350\277\22124\344\270\252\346\234\210\347\232\204\346\264\273\350\267\203\346\225\260\346\215\256\346\200\273\346\225\260.csv" rename to "data/PingCAP/\350\277\22124\344\270\252\346\234\210\347\232\204\346\264\273\350\267\203\346\225\260\346\215\256\346\200\273\346\225\260.csv" diff --git "a/PingCAP/\351\241\271\347\233\256\345\217\227\346\254\242\350\277\216\345\272\246\345\217\230\345\214\226.csv" "b/data/PingCAP/\351\241\271\347\233\256\345\217\227\346\254\242\350\277\216\345\272\246\345\217\230\345\214\226.csv" similarity index 100% rename from "PingCAP/\351\241\271\347\233\256\345\217\227\346\254\242\350\277\216\345\272\246\345\217\230\345\214\226.csv" rename to "data/PingCAP/\351\241\271\347\233\256\345\217\227\346\254\242\350\277\216\345\272\246\345\217\230\345\214\226.csv" diff --git "a/PingCAP/\351\241\271\347\233\256\346\264\273\350\267\203\345\272\246\345\217\230\345\214\226.csv" "b/data/PingCAP/\351\241\271\347\233\256\346\264\273\350\267\203\345\272\246\345\217\230\345\214\226.csv" similarity index 100% rename from "PingCAP/\351\241\271\347\233\256\346\264\273\350\267\203\345\272\246\345\217\230\345\214\226.csv" rename to "data/PingCAP/\351\241\271\347\233\256\346\264\273\350\267\203\345\272\246\345\217\230\345\214\226.csv" diff --git a/RepoType-OSChina.xlsx b/data/RepoType-OSChina.xlsx similarity index 100% rename from RepoType-OSChina.xlsx rename to data/RepoType-OSChina.xlsx diff --git a/RepoType.xlsx b/data/RepoType.xlsx similarity index 100% rename from RepoType.xlsx rename to data/RepoType.xlsx diff --git a/dataset/cncf-interactive-landscape.csv b/data/dataset/cncf-interactive-landscape.csv similarity index 100% rename from dataset/cncf-interactive-landscape.csv rename to data/dataset/cncf-interactive-landscape.csv diff --git a/dataset/interactive_landscape.csv b/data/dataset/interactive_landscape.csv similarity index 100% rename from dataset/interactive_landscape.csv rename to data/dataset/interactive_landscape.csv diff --git a/dataset/linuxfoundation-interactive-landscape.csv b/data/dataset/linuxfoundation-interactive-landscape.csv similarity index 100% rename from dataset/linuxfoundation-interactive-landscape.csv rename to data/dataset/linuxfoundation-interactive-landscape.csv diff --git a/dataset/repo-list.json b/data/dataset/repo-list.json similarity index 100% rename from dataset/repo-list.json rename to data/dataset/repo-list.json diff --git a/src/main.py b/src/main.py index f5ab7ed4de9b9a453a7aa2cdba09dd3da94a23a9..c09911921d2fc7a7c51eadd1b1de885d99c87286 100644 --- a/src/main.py +++ b/src/main.py @@ -23,7 +23,7 @@ config = { }, "schema": { "repo_github_user_info": { - "file": "../GitHub/Userinfo.xlsx", + "file": "../data/GitHub/Userinfo.xlsx", "sheet_name": "repo", "desc": "开源项目Github贡献者信息", "fields": [ @@ -100,7 +100,7 @@ config = { ] }, "repo_github_info": { - "file": "../Github-Repos.xlsx", + "file": "../data/Github-Repos.xlsx", "sheet_name": "汇总", "desc": "开源项目在Github上的项目交互数据", "fields": [ @@ -162,7 +162,7 @@ config = { ] }, "repo_csdn_trends": { - "file": "../CSDN/repo-csdn-trends.xlsx", + "file": "../data/CSDN/repo-csdn-trends.xlsx", "sheet_name": "Sheet1", "desc": "开源项目在CSDN站内指数数据", "fields": [ @@ -179,7 +179,7 @@ config = { ] }, "repo_commit_rank": { - "file": "../CSDN/repo-commit-rank.csv", + "file": "../data/CSDN/repo-commit-rank.csv", "desc": "开源项目在Github的月commit变化", "fields": [ { @@ -205,7 +205,7 @@ config = { ] }, "repo_github_active_trends": { - "file": "../PingCAP/项目活跃度变化.csv", + "file": "../data/PingCAP/项目活跃度变化.csv", "desc": "开源项目在Github上的月活跃度数据", "fields": [ { @@ -241,7 +241,7 @@ config = { ] }, "repo_github_popular_trends": { - "file": "../PingCAP/项目受欢迎度变化.csv", + "file": "../data/PingCAP/项目受欢迎度变化.csv", "desc": "开源项目在Github上的月收欢迎程度数据", "fields": [ { diff --git a/src/repoinfo.py b/src/repoinfo.py index 376f611fb5727ad6b5fc4d800bba80148c4ce13c..efaf7d8b300628a28e3774c9c444083e6cd41274 100644 --- a/src/repoinfo.py +++ b/src/repoinfo.py @@ -6,38 +6,39 @@ import time import re # 读取json文件 -with open("dataset/repo-list.json", 'r') as f: +with open("../data/dataset/repo-list.json", 'r') as f: data = json.load(f) -with open('dataset/result.txt', 'r+') as file: +with open('../data/dataset/result.txt', 'r+') as file: file.truncate(0) for list_item in data: # print(list_item["full_name"]) a = list_item["full_name"] - + url = "https://api.github.com/repos/" + a # url2 = "https://api.github.com/repos/" + a + "/contributors?per_page=1&anon=true" - payload={} + payload = {} headers = { - 'Authorization': '' # 这里填入你自己的 GitHub Personal Access Token - } - + 'Authorization': '' # 这里填入你自己的 GitHub Personal Access Token + } + response = requests.request("GET", url, headers=headers, data=payload) # response2 = requests.request("GET", url2, headers=headers, data=payload) - + # headers2 = requests.get(url2).headers # print(headers2) # pagesize = [int(s) for s in re.findall(r'\b\d+\b',headers2['Link'])] - + # print(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(response.json()["open_issues"]) + ',' + str(pagesize[-1])) # with open('dataset/result.txt', 'a') as f: # f.write(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(pagesize[-1]) + "\n") # time.sleep(4) # Sleep for 2 seconds # print(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(response.json()["open_issues"])) - with open('dataset/result.txt', 'a') as f: - f.write(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + "\n") - time.sleep(2) # Sleep for 2 seconds \ No newline at end of file + with open('../data/dataset/result.txt', 'a') as f: + f.write(str(response.json()["id"]) + ',' + response.json()["owner"]["login"] + ',' + response.json()[ + "full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + "\n") + time.sleep(2) # Sleep for 2 seconds