From 1e93b039333d44b32bf7d742d68732ec6ee55962 Mon Sep 17 00:00:00 2001 From: feilong Date: Thu, 13 Oct 2022 22:55:24 +0800 Subject: [PATCH] refactor data dir --- {CSDN => data/CSDN}/Processor.jpg | Bin {CSDN => data/CSDN}/repo-commit-rank.csv | 0 {CSDN => data/CSDN}/repo-csdn-trends.xlsx | Bin ...mmit_events_20200930_analysis_new-demo.csv | 0 GitHub-Repos.xlsx => data/GitHub-Repos.xlsx | Bin {GitHub => data/GitHub}/UserInfo.xlsx | Bin ...\346\215\256\351\234\200\346\261\202.xlsx" | Bin ...0\346\215\256\346\200\273\346\225\260.csv" | 0 ...6\345\272\246\345\217\230\345\214\226.csv" | 0 ...3\345\272\246\345\217\230\345\214\226.csv" | 0 .../RepoType-OSChina.xlsx | Bin RepoType.xlsx => data/RepoType.xlsx | Bin .../dataset}/cncf-interactive-landscape.csv | 0 .../dataset}/interactive_landscape.csv | 0 .../linuxfoundation-interactive-landscape.csv | 0 {dataset => data/dataset}/repo-list.json | 0 src/main.py | 12 ++++----- src/repoinfo.py | 25 +++++++++--------- 18 files changed, 19 insertions(+), 18 deletions(-) rename {CSDN => data/CSDN}/Processor.jpg (100%) rename {CSDN => data/CSDN}/repo-commit-rank.csv (100%) rename {CSDN => data/CSDN}/repo-csdn-trends.xlsx (100%) rename {CSDN => data/CSDN}/repository_commit_events_20200930_analysis_new-demo.csv (100%) rename GitHub-Repos.xlsx => data/GitHub-Repos.xlsx (100%) rename {GitHub => data/GitHub}/UserInfo.xlsx (100%) rename "OSS\346\225\260\346\215\256\351\234\200\346\261\202.xlsx" => "data/OSS\346\225\260\346\215\256\351\234\200\346\261\202.xlsx" (100%) rename "PingCAP/\350\277\22124\344\270\252\346\234\210\347\232\204\346\264\273\350\267\203\346\225\260\346\215\256\346\200\273\346\225\260.csv" => "data/PingCAP/\350\277\22124\344\270\252\346\234\210\347\232\204\346\264\273\350\267\203\346\225\260\346\215\256\346\200\273\346\225\260.csv" (100%) rename "PingCAP/\351\241\271\347\233\256\345\217\227\346\254\242\350\277\216\345\272\246\345\217\230\345\214\226.csv" => "data/PingCAP/\351\241\271\347\233\256\345\217\227\346\254\242\350\277\216\345\272\246\345\217\230\345\214\226.csv" (100%) rename "PingCAP/\351\241\271\347\233\256\346\264\273\350\267\203\345\272\246\345\217\230\345\214\226.csv" => "data/PingCAP/\351\241\271\347\233\256\346\264\273\350\267\203\345\272\246\345\217\230\345\214\226.csv" (100%) rename RepoType-OSChina.xlsx => data/RepoType-OSChina.xlsx (100%) rename RepoType.xlsx => data/RepoType.xlsx (100%) rename {dataset => data/dataset}/cncf-interactive-landscape.csv (100%) rename {dataset => data/dataset}/interactive_landscape.csv (100%) rename {dataset => data/dataset}/linuxfoundation-interactive-landscape.csv (100%) rename {dataset => data/dataset}/repo-list.json (100%) diff --git a/CSDN/Processor.jpg b/data/CSDN/Processor.jpg similarity index 100% rename from CSDN/Processor.jpg rename to data/CSDN/Processor.jpg diff --git a/CSDN/repo-commit-rank.csv b/data/CSDN/repo-commit-rank.csv similarity index 100% rename from CSDN/repo-commit-rank.csv rename to data/CSDN/repo-commit-rank.csv diff --git a/CSDN/repo-csdn-trends.xlsx b/data/CSDN/repo-csdn-trends.xlsx similarity index 100% rename from CSDN/repo-csdn-trends.xlsx rename to data/CSDN/repo-csdn-trends.xlsx diff --git a/CSDN/repository_commit_events_20200930_analysis_new-demo.csv b/data/CSDN/repository_commit_events_20200930_analysis_new-demo.csv similarity index 100% rename from CSDN/repository_commit_events_20200930_analysis_new-demo.csv rename to data/CSDN/repository_commit_events_20200930_analysis_new-demo.csv diff --git a/GitHub-Repos.xlsx b/data/GitHub-Repos.xlsx similarity index 100% rename from GitHub-Repos.xlsx rename to data/GitHub-Repos.xlsx diff --git a/GitHub/UserInfo.xlsx b/data/GitHub/UserInfo.xlsx similarity index 100% rename from GitHub/UserInfo.xlsx rename to data/GitHub/UserInfo.xlsx diff --git "a/OSS\346\225\260\346\215\256\351\234\200\346\261\202.xlsx" "b/data/OSS\346\225\260\346\215\256\351\234\200\346\261\202.xlsx" similarity index 100% rename from "OSS\346\225\260\346\215\256\351\234\200\346\261\202.xlsx" rename to "data/OSS\346\225\260\346\215\256\351\234\200\346\261\202.xlsx" diff --git "a/PingCAP/\350\277\22124\344\270\252\346\234\210\347\232\204\346\264\273\350\267\203\346\225\260\346\215\256\346\200\273\346\225\260.csv" "b/data/PingCAP/\350\277\22124\344\270\252\346\234\210\347\232\204\346\264\273\350\267\203\346\225\260\346\215\256\346\200\273\346\225\260.csv" similarity index 100% rename from "PingCAP/\350\277\22124\344\270\252\346\234\210\347\232\204\346\264\273\350\267\203\346\225\260\346\215\256\346\200\273\346\225\260.csv" rename to "data/PingCAP/\350\277\22124\344\270\252\346\234\210\347\232\204\346\264\273\350\267\203\346\225\260\346\215\256\346\200\273\346\225\260.csv" diff --git "a/PingCAP/\351\241\271\347\233\256\345\217\227\346\254\242\350\277\216\345\272\246\345\217\230\345\214\226.csv" "b/data/PingCAP/\351\241\271\347\233\256\345\217\227\346\254\242\350\277\216\345\272\246\345\217\230\345\214\226.csv" similarity index 100% rename from "PingCAP/\351\241\271\347\233\256\345\217\227\346\254\242\350\277\216\345\272\246\345\217\230\345\214\226.csv" rename to "data/PingCAP/\351\241\271\347\233\256\345\217\227\346\254\242\350\277\216\345\272\246\345\217\230\345\214\226.csv" diff --git "a/PingCAP/\351\241\271\347\233\256\346\264\273\350\267\203\345\272\246\345\217\230\345\214\226.csv" "b/data/PingCAP/\351\241\271\347\233\256\346\264\273\350\267\203\345\272\246\345\217\230\345\214\226.csv" similarity index 100% rename from "PingCAP/\351\241\271\347\233\256\346\264\273\350\267\203\345\272\246\345\217\230\345\214\226.csv" rename to "data/PingCAP/\351\241\271\347\233\256\346\264\273\350\267\203\345\272\246\345\217\230\345\214\226.csv" diff --git a/RepoType-OSChina.xlsx b/data/RepoType-OSChina.xlsx similarity index 100% rename from RepoType-OSChina.xlsx rename to data/RepoType-OSChina.xlsx diff --git a/RepoType.xlsx b/data/RepoType.xlsx similarity index 100% rename from RepoType.xlsx rename to data/RepoType.xlsx diff --git a/dataset/cncf-interactive-landscape.csv b/data/dataset/cncf-interactive-landscape.csv similarity index 100% rename from dataset/cncf-interactive-landscape.csv rename to data/dataset/cncf-interactive-landscape.csv diff --git a/dataset/interactive_landscape.csv b/data/dataset/interactive_landscape.csv similarity index 100% rename from dataset/interactive_landscape.csv rename to data/dataset/interactive_landscape.csv diff --git a/dataset/linuxfoundation-interactive-landscape.csv b/data/dataset/linuxfoundation-interactive-landscape.csv similarity index 100% rename from dataset/linuxfoundation-interactive-landscape.csv rename to data/dataset/linuxfoundation-interactive-landscape.csv diff --git a/dataset/repo-list.json b/data/dataset/repo-list.json similarity index 100% rename from dataset/repo-list.json rename to data/dataset/repo-list.json diff --git a/src/main.py b/src/main.py index f5ab7ed..c099119 100644 --- a/src/main.py +++ b/src/main.py @@ -23,7 +23,7 @@ config = { }, "schema": { "repo_github_user_info": { - "file": "../GitHub/Userinfo.xlsx", + "file": "../data/GitHub/Userinfo.xlsx", "sheet_name": "repo", "desc": "开源项目Github贡献者信息", "fields": [ @@ -100,7 +100,7 @@ config = { ] }, "repo_github_info": { - "file": "../Github-Repos.xlsx", + "file": "../data/Github-Repos.xlsx", "sheet_name": "汇总", "desc": "开源项目在Github上的项目交互数据", "fields": [ @@ -162,7 +162,7 @@ config = { ] }, "repo_csdn_trends": { - "file": "../CSDN/repo-csdn-trends.xlsx", + "file": "../data/CSDN/repo-csdn-trends.xlsx", "sheet_name": "Sheet1", "desc": "开源项目在CSDN站内指数数据", "fields": [ @@ -179,7 +179,7 @@ config = { ] }, "repo_commit_rank": { - "file": "../CSDN/repo-commit-rank.csv", + "file": "../data/CSDN/repo-commit-rank.csv", "desc": "开源项目在Github的月commit变化", "fields": [ { @@ -205,7 +205,7 @@ config = { ] }, "repo_github_active_trends": { - "file": "../PingCAP/项目活跃度变化.csv", + "file": "../data/PingCAP/项目活跃度变化.csv", "desc": "开源项目在Github上的月活跃度数据", "fields": [ { @@ -241,7 +241,7 @@ config = { ] }, "repo_github_popular_trends": { - "file": "../PingCAP/项目受欢迎度变化.csv", + "file": "../data/PingCAP/项目受欢迎度变化.csv", "desc": "开源项目在Github上的月收欢迎程度数据", "fields": [ { diff --git a/src/repoinfo.py b/src/repoinfo.py index 376f611..efaf7d8 100644 --- a/src/repoinfo.py +++ b/src/repoinfo.py @@ -6,38 +6,39 @@ import time import re # 读取json文件 -with open("dataset/repo-list.json", 'r') as f: +with open("../data/dataset/repo-list.json", 'r') as f: data = json.load(f) -with open('dataset/result.txt', 'r+') as file: +with open('../data/dataset/result.txt', 'r+') as file: file.truncate(0) for list_item in data: # print(list_item["full_name"]) a = list_item["full_name"] - + url = "https://api.github.com/repos/" + a # url2 = "https://api.github.com/repos/" + a + "/contributors?per_page=1&anon=true" - payload={} + payload = {} headers = { - 'Authorization': '' # 这里填入你自己的 GitHub Personal Access Token - } - + 'Authorization': '' # 这里填入你自己的 GitHub Personal Access Token + } + response = requests.request("GET", url, headers=headers, data=payload) # response2 = requests.request("GET", url2, headers=headers, data=payload) - + # headers2 = requests.get(url2).headers # print(headers2) # pagesize = [int(s) for s in re.findall(r'\b\d+\b',headers2['Link'])] - + # print(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(response.json()["open_issues"]) + ',' + str(pagesize[-1])) # with open('dataset/result.txt', 'a') as f: # f.write(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(pagesize[-1]) + "\n") # time.sleep(4) # Sleep for 2 seconds # print(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(response.json()["open_issues"])) - with open('dataset/result.txt', 'a') as f: - f.write(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + "\n") - time.sleep(2) # Sleep for 2 seconds \ No newline at end of file + with open('../data/dataset/result.txt', 'a') as f: + f.write(str(response.json()["id"]) + ',' + response.json()["owner"]["login"] + ',' + response.json()[ + "full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + "\n") + time.sleep(2) # Sleep for 2 seconds -- GitLab