提交 067be799 编写于 作者: F feilong

fix repo country

上级 cddcfc3d
......@@ -2,6 +2,9 @@
import numpy as np
import pandas as pd
def quit():
import sys
sys.exit(0)
def load_repo_github_info(config, ctx):
schema = config["schema"]
......@@ -225,6 +228,7 @@ def rank_personal_top_n(config, ctx):
# 合并 repo_github_user_info 仓库排行表记录
df = ctx["repo_github_user_commit_info"]
exit_person_count = 0
for index, row in df.iterrows():
email = row["actor_email"]
......@@ -238,6 +242,12 @@ def rank_personal_top_n(config, ctx):
person_key = email.lower()
repo_key = repo_name.lower()
repo_item = repo_top_n_dict.get(repo_key)
person = personal_dict.get(person_key)
if person is not None:
exit_person_count+=1
else:
person = {}
if repo_item is not None:
repo_user_contribute_list = repo_item.get('user_contribute_list')
if repo_user_contribute_list is None:
......@@ -247,9 +257,11 @@ def rank_personal_top_n(config, ctx):
# 项目的用户贡献者列表增加
repo_user_contribute_list.append({
"actor_email": email,
"country": person.get("country", "Null"),
"total": total,
"repo_score": repo_item['score'] # 项目得分
})
print(f"exit_person_count:{exit_person_count}")
# 项目内的用户贡献排序
for repo_key in repo_top_n_dict:
......@@ -258,8 +270,30 @@ def rank_personal_top_n(config, ctx):
if repo_user_contribute_list is not None:
repo_user_contribute_list.sort(
key=lambda x: x["total"], reverse=True)
# 根据项目贡献者的国别信息统计,来决定项目的国别
s = {
"china":0,
"international": 0,
"Null": 0,
"all": len(repo_user_contribute_list)
}
for u in repo_user_contribute_list:
s[u["country"]]+=1
# 如果项目内的成员,在国别上占主导优势(超过50%)
# 就判定该项目为对应的国别
if s["Null"]/s["all"]>0.5:
repo_item["country"] = "Null"
else:
print(f"[warn] missing contributors repo:{repo_key}")
if (s["china"]+s["Null"])/s["all"]>0.5:
repo_item["country"] = "china"
else:
repo_item["country"] = "international"
else:
# print(f"[warn] missing contributors repo:{repo_key}")
pass
# 现在,合并每个项目的用户贡献到一个总表
personal_score_dict = {}
......@@ -285,7 +319,7 @@ def rank_personal_top_n(config, ctx):
if repo_user_contribute_list is None:
continue
repo_region = repo_item['region']
repo_country = repo_item['country']
for repo_person in repo_user_contribute_list:
email = repo_person['actor_email']
......@@ -311,7 +345,7 @@ def rank_personal_top_n(config, ctx):
repo_person["repo_score"]
# en
if repo_region != "国产":
if repo_country == "international":
person_item = personal_score_dict_en.get(person_key)
if person_item is None:
person_item = {
......@@ -329,7 +363,7 @@ def rank_personal_top_n(config, ctx):
else:
person_item['score'] += repo_person["total"] * \
repo_person["repo_score"]
else:
elif repo_country == "china":
# zh_cn
person_item = personal_score_dict_zh_cn.get(person_key)
if person_item is None:
......@@ -348,6 +382,8 @@ def rank_personal_top_n(config, ctx):
else:
person_item['score'] += repo_person["total"] * \
repo_person["repo_score"]
else:
pass
# 个人向全部技术项目贡献排行榜
df = pd.DataFrame.from_dict(personal_score_dict, orient='index')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册