fix repo country

067be799 · feilong · cddcfc3d · 067be799 · 067be799 · 067be799
4 changed file
--- a/ranks/个人向中国主要开源技术栈贡献榜单.csv
+++ b/ranks/个人向中国主要开源技术栈贡献榜单.csv
--- a/ranks/个人向国际主要开源技术栈贡献榜单.csv
+++ b/ranks/个人向国际主要开源技术栈贡献榜单.csv
--- a/ranks/公司向中国主要开源技术栈贡献榜单.csv
+++ b/ranks/公司向中国主要开源技术栈贡献榜单.csv
--- a/src/tasks/ranks.py
+++ b/src/tasks/ranks.py
@@ -2,6 +2,9 @@
 import numpy as np
 import pandas as pd
+def quit():
+    import sys
+    sys.exit(0)
 def load_repo_github_info(config, ctx):
    schema = config["schema"]
@@ -225,6 +228,7 @@ def rank_personal_top_n(config, ctx):
    # 合并 repo_github_user_info 仓库排行表记录
    df = ctx["repo_github_user_commit_info"]
+    exit_person_count = 0
    for index, row in df.iterrows():
        email = row["actor_email"]
@@ -238,6 +242,12 @@ def rank_personal_top_n(config, ctx):
        person_key = email.lower()
        repo_key = repo_name.lower()
        repo_item = repo_top_n_dict.get(repo_key)
+        person = personal_dict.get(person_key)
+        if person is not None:
+            exit_person_count+=1
+        else:
+            person = {}
        if repo_item is not None:
            repo_user_contribute_list = repo_item.get('user_contribute_list')
            if repo_user_contribute_list is None:
@@ -247,9 +257,11 @@ def rank_personal_top_n(config, ctx):
            # 项目的用户贡献者列表增加
            repo_user_contribute_list.append({
                "actor_email": email,
+                "country": person.get("country", "Null"),
                "total": total,
                "repo_score": repo_item['score']  # 项目得分
            })
+    print(f"exit_person_count:{exit_person_count}")
    # 项目内的用户贡献排序
    for repo_key in repo_top_n_dict:
@@ -258,8 +270,30 @@ def rank_personal_top_n(config, ctx):
        if repo_user_contribute_list is not None:
            repo_user_contribute_list.sort(
                key=lambda x: x["total"], reverse=True)
+            # 根据项目贡献者的国别信息统计，来决定项目的国别
+            s = {
+                "china":0,
+                "international": 0,
+                "Null": 0,
+                "all": len(repo_user_contribute_list)
+            }
+            for u in repo_user_contribute_list:
+                s[u["country"]]+=1
+            # 如果项目内的成员，在国别上占主导优势（超过50%）
+            # 就判定该项目为对应的国别
+            if s["Null"]/s["all"]>0.5:
+                repo_item["country"] = "Null"
+            else:
+                if (s["china"]+s["Null"])/s["all"]>0.5:
+                    repo_item["country"] = "china"
+                else:
+                    repo_item["country"] = "international"
        else:
-            print(f"[warn] missing contributors repo:{repo_key}")
+            # print(f"[warn] missing contributors repo:{repo_key}")
+            pass
    # 现在，合并每个项目的用户贡献到一个总表
    personal_score_dict = {}
@@ -285,7 +319,7 @@ def rank_personal_top_n(config, ctx):
        if repo_user_contribute_list is None:
            continue
-        repo_region = repo_item['region']
+        repo_country = repo_item['country']
        for repo_person in repo_user_contribute_list:
            email = repo_person['actor_email']
@@ -311,7 +345,7 @@ def rank_personal_top_n(config, ctx):
                    repo_person["repo_score"]
            # en
-            if repo_region != "国产":
+            if repo_country == "international":
                person_item = personal_score_dict_en.get(person_key)
                if person_item is None:
                    person_item = {
@@ -329,7 +363,7 @@ def rank_personal_top_n(config, ctx):
                else:
                    person_item['score'] += repo_person["total"] * \
                        repo_person["repo_score"]
-            else:
+            elif repo_country == "china":
                # zh_cn
                person_item = personal_score_dict_zh_cn.get(person_key)
                if person_item is None:
@@ -348,6 +382,8 @@ def rank_personal_top_n(config, ctx):
                else:
                    person_item['score'] += repo_person["total"] * \
                        repo_person["repo_score"]
+            else:
+                pass
    # 个人向全部技术项目贡献排行榜
    df = pd.DataFrame.from_dict(personal_score_dict, orient='index')