refactor data dir

1e93b039 · feilong · 6f0bde03 · 1e93b039 · 1e93b039 · 1e93b039
18 changed file
--- a/CSDN/Processor.jpg
+++ b/CSDN/Processor.jpg
--- a/CSDN/repo-commit-rank.csv
+++ b/CSDN/repo-commit-rank.csv
--- a/CSDN/repo-csdn-trends.xlsx
+++ b/CSDN/repo-csdn-trends.xlsx
--- a/CSDN/repository_commit_events_20200930_analysis_new-demo.csv
+++ b/CSDN/repository_commit_events_20200930_analysis_new-demo.csv
--- a/GitHub-Repos.xlsx
+++ b/GitHub-Repos.xlsx
--- a/GitHub/UserInfo.xlsx
+++ b/GitHub/UserInfo.xlsx
--- a/OSS数据需求.xlsx
+++ b/OSS数据需求.xlsx
--- a/PingCAP/近24个月的活跃数据总数.csv
+++ b/PingCAP/近24个月的活跃数据总数.csv
--- a/PingCAP/项目受欢迎度变化.csv
+++ b/PingCAP/项目受欢迎度变化.csv
--- a/PingCAP/项目活跃度变化.csv
+++ b/PingCAP/项目活跃度变化.csv
--- a/RepoType-OSChina.xlsx
+++ b/RepoType-OSChina.xlsx
--- a/RepoType.xlsx
+++ b/RepoType.xlsx
--- a/dataset/cncf-interactive-landscape.csv
+++ b/dataset/cncf-interactive-landscape.csv
--- a/dataset/interactive_landscape.csv
+++ b/dataset/interactive_landscape.csv
--- a/dataset/linuxfoundation-interactive-landscape.csv
+++ b/dataset/linuxfoundation-interactive-landscape.csv
--- a/dataset/repo-list.json
+++ b/dataset/repo-list.json
--- a/src/main.py
+++ b/src/main.py
@@ -23,7 +23,7 @@ config = {
    },
    "schema": {
        "repo_github_user_info": {
-            "file": "../GitHub/Userinfo.xlsx",
+            "file": "../data/GitHub/Userinfo.xlsx",
            "sheet_name": "repo",
            "desc": "开源项目Github贡献者信息",
            "fields": [
@@ -100,7 +100,7 @@ config = {
            ]
        },
        "repo_github_info": {
-            "file": "../Github-Repos.xlsx",
+            "file": "../data/Github-Repos.xlsx",
            "sheet_name": "汇总",
            "desc": "开源项目在Github上的项目交互数据",
            "fields": [
@@ -162,7 +162,7 @@ config = {
            ]
        },
        "repo_csdn_trends": {
-            "file": "../CSDN/repo-csdn-trends.xlsx",
+            "file": "../data/CSDN/repo-csdn-trends.xlsx",
            "sheet_name": "Sheet1",
            "desc": "开源项目在CSDN站内指数数据",
            "fields": [
@@ -179,7 +179,7 @@ config = {
            ]
        },
        "repo_commit_rank": {
-            "file": "../CSDN/repo-commit-rank.csv",
+            "file": "../data/CSDN/repo-commit-rank.csv",
            "desc": "开源项目在Github的月commit变化",
            "fields": [
                {
@@ -205,7 +205,7 @@ config = {
            ]
        },
        "repo_github_active_trends": {
-            "file": "../PingCAP/项目活跃度变化.csv",
+            "file": "../data/PingCAP/项目活跃度变化.csv",
            "desc": "开源项目在Github上的月活跃度数据",
            "fields": [
                {
@@ -241,7 +241,7 @@ config = {
            ]
        },
        "repo_github_popular_trends": {
-            "file": "../PingCAP/项目受欢迎度变化.csv",
+            "file": "../data/PingCAP/项目受欢迎度变化.csv",
            "desc": "开源项目在Github上的月收欢迎程度数据",
            "fields": [
                {

--- a/src/repoinfo.py
+++ b/src/repoinfo.py
@@ -6,38 +6,39 @@ import time
 import re

 # 读取json文件
-with open("dataset/repo-list.json", 'r') as f:
+with open("../data/dataset/repo-list.json", 'r') as f:
    data = json.load(f)

-with open('dataset/result.txt', 'r+') as file:
+with open('../data/dataset/result.txt', 'r+') as file:
    file.truncate(0)

 for list_item in data:
    # print(list_item["full_name"])
    a = list_item["full_name"]
-    
+
    url = "https://api.github.com/repos/" + a
    # url2 = "https://api.github.com/repos/" + a + "/contributors?per_page=1&anon=true"

-    payload={}
+    payload = {}
    headers = {
-        'Authorization': '' # 这里填入你自己的 GitHub Personal Access Token
-        }
-    
+        'Authorization': ''  # 这里填入你自己的 GitHub Personal Access Token
+    }
+
    response = requests.request("GET", url, headers=headers, data=payload)

    # response2 = requests.request("GET", url2, headers=headers, data=payload)
-    
+
    # headers2 = requests.get(url2).headers
    # print(headers2)
    # pagesize = [int(s) for s in re.findall(r'\b\d+\b',headers2['Link'])]
-    
+
    # print(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(response.json()["open_issues"]) + ',' + str(pagesize[-1]))
    # with open('dataset/result.txt', 'a') as f:
    #     f.write(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(pagesize[-1]) + "\n")
    # time.sleep(4) # Sleep for 2 seconds

    # print(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + ',' + str(response.json()["open_issues"]))
-    with open('dataset/result.txt', 'a') as f:
-        f.write(str(response.json()["id"])+ ',' + response.json()["owner"]["login"] + ',' + response.json()["full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + "\n")
-    time.sleep(2) # Sleep for 2 seconds
\ No newline at end of file
+    with open('../data/dataset/result.txt', 'a') as f:
+        f.write(str(response.json()["id"]) + ',' + response.json()["owner"]["login"] + ',' + response.json()[
+                "full_name"] + ',' + str(response.json()["stargazers_count"]) + ',' + str(response.json()["forks_count"]) + "\n")
+    time.sleep(2)  # Sleep for 2 seconds