Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
CSDN 技术社区
1024 Report
提交
89cef470
1
1024 Report
项目概览
CSDN 技术社区
/
1024 Report
通知
84
Star
6
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
1
1024 Report
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
89cef470
编写于
10月 14, 2022
作者:
F
feilong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add personal rank
上级
656cf5f7
变更
4
展开全部
隐藏空白更改
内联
并排
Showing
4 changed file
with
193 addition
and
21 deletion
+193
-21
ranks/个人向主要开源技术栈贡献榜单.csv
ranks/个人向主要开源技术栈贡献榜单.csv
+0
-0
ranks/开源项目榜.csv
ranks/开源项目榜.csv
+0
-0
src/config.py
src/config.py
+26
-0
src/tasks/ranks.py
src/tasks/ranks.py
+167
-21
未找到文件。
ranks/个人向主要开源技术栈贡献榜单.csv
0 → 100644
浏览文件 @
89cef470
此差异已折叠。
点击以展开。
ranks/开源项目榜.csv
浏览文件 @
89cef470
此差异已折叠。
点击以展开。
src/config.py
浏览文件 @
89cef470
...
@@ -103,6 +103,32 @@ config = {
...
@@ -103,6 +103,32 @@ config = {
}
}
]
]
},
},
"repo_github_user_commit_info"
:
{
"file"
:
"../data/CSDN/commit_analysis.csv"
,
"desc"
:
"开发者在开源项目的贡献数信息"
,
"fields"
:
[
{
"field_name"
:
"repo_name"
,
"field_type"
:
"str"
,
"desc"
:
"开源项目仓库名字"
},
{
"field_name"
:
"any_commit_id"
,
"field_type"
:
"str"
,
"desc"
:
"开发者在对应开源项目上贡献的任意commit id"
},
{
"field_name"
:
"actor_email"
,
"field_type"
:
"str"
,
"desc"
:
"用户邮箱"
},
{
"field_name"
:
"total"
,
"field_type"
:
"int"
,
"desc"
:
"开发者累计Github项目贡献数"
},
]
},
"repo_github_info"
:
{
"repo_github_info"
:
{
"file"
:
"../data/Github-Repos.xlsx"
,
"file"
:
"../data/Github-Repos.xlsx"
,
"sheet_name"
:
"汇总"
,
"sheet_name"
:
"汇总"
,
...
...
src/tasks/ranks.py
浏览文件 @
89cef470
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
from
audioop
import
reverse
import
os
import
os
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
from
config
import
config
def
load_repo_github_user_info
(
config
,
ctx
):
schema
=
config
[
"schema"
]
df
=
pd
.
read_excel
(
schema
[
"repo_github_user_info"
][
"file"
],
sheet_name
=
schema
[
"repo_github_user_info"
][
"sheet_name"
])
df
.
fillna
(
value
=
0
,
inplace
=
True
)
ctx
[
"repo_github_user_info"
]
=
df
def
load_repo_github_info
(
config
,
ctx
):
def
load_repo_github_info
(
config
,
ctx
):
...
@@ -53,6 +44,33 @@ def load_repo_github_popular_trends(config, ctx):
...
@@ -53,6 +44,33 @@ def load_repo_github_popular_trends(config, ctx):
ctx
[
"repo_github_popular_trends"
]
=
df
ctx
[
"repo_github_popular_trends"
]
=
df
def
load_repo_github_user_info
(
config
,
ctx
):
# bar = None
# def show_pd_read_excel_progress(cur, tt):
# global bar
# if bar is None:
# bar = IncrementalBar("@开始加载用户仓库贡献表:", max=tt)
# else:
# bar.next(1)
schema
=
config
[
"schema"
]
df
=
pd
.
read_excel
(
schema
[
"repo_github_user_info"
][
"file"
],
sheet_name
=
schema
[
"repo_github_user_info"
][
"sheet_name"
])
df
.
fillna
(
value
=
0
,
inplace
=
True
)
ctx
[
"repo_github_user_info"
]
=
df
def
load_repo_github_user_commit_info
(
config
,
ctx
):
schema
=
config
[
"schema"
]
df
=
pd
.
read_csv
(
schema
[
"repo_github_user_commit_info"
][
"file"
])
df
.
fillna
(
value
=
0
,
inplace
=
True
)
ctx
[
"repo_github_user_commit_info"
]
=
df
def
rank_repo_top_n
(
config
,
ctx
):
def
rank_repo_top_n
(
config
,
ctx
):
repo_rank
=
[]
repo_rank
=
[]
repo_dict
=
{}
repo_dict
=
{}
...
@@ -136,24 +154,131 @@ def rank_repo_top_n(config, ctx):
...
@@ -136,24 +154,131 @@ def rank_repo_top_n(config, ctx):
df
=
df
.
sort_values
(
by
=
"score"
,
ascending
=
False
)
df
=
df
.
sort_values
(
by
=
"score"
,
ascending
=
False
)
df
.
to_csv
(
config
[
"ranks"
][
"repo"
][
"top_n"
])
df
.
to_csv
(
config
[
"ranks"
][
"repo"
][
"top_n"
])
for
index
,
row
in
df
.
iterrows
():
repo_name
=
index
repo_item
=
repo_dict
[
repo_name
]
repo_item
[
"score"
]
=
row
[
"score"
]
ctx
[
"ranks"
][
"repo_top_n"
]
=
df
ctx
[
"ranks"
][
"repo_top_n_dict"
]
=
repo_dict
def
rank_personal_top_n
(
config
,
ctx
):
def
rank_personal_top_n
(
config
,
ctx
):
pass
repo_top_n_dict
=
ctx
[
"ranks"
][
"repo_top_n_dict"
]
# 合并 repo_github_user_info 表,主键是 actor_email
personal_dict
=
{}
df
=
ctx
[
"repo_github_user_info"
]
for
index
,
row
in
df
.
iterrows
():
email
=
row
[
"actor_email"
]
def
rank_company_top_n
(
config
,
ctx
):
# 忽略错误数据
pass
if
type
(
email
)
!=
type
(
""
):
continue
person_key
=
email
.
lower
()
person
=
{}
personal_dict
[
person_key
]
=
person
# 用户基本信息
person
[
'avatar_url'
]
=
row
[
'avatar_url'
]
person
[
'name'
]
=
row
[
'name'
]
person
[
'company'
]
=
row
[
'company'
]
person
[
'location'
]
=
row
[
'location'
]
person
[
'followers'
]
=
row
[
'followers'
]
person
[
'author_id'
]
=
row
[
'author_id'
]
person
[
'type'
]
=
row
[
'type'
]
person
[
'login'
]
=
row
[
'login'
]
# 用户统计数据
person
[
'all_repo_contribute_total'
]
=
int
(
row
[
'sum_total'
])
# 合并 repo_github_user_info 仓库排行表记录
df
=
ctx
[
"repo_github_user_commit_info"
]
for
index
,
row
in
df
.
iterrows
():
email
=
row
[
"actor_email"
]
# 忽略错误数据
if
type
(
email
)
!=
type
(
""
):
continue
total
=
row
[
"total"
]
repo_name
=
row
[
"repo_name"
]
person_key
=
email
.
lower
()
repo_key
=
repo_name
.
lower
()
repo_item
=
repo_top_n_dict
.
get
(
repo_key
)
if
repo_item
is
not
None
:
repo_user_contribute_list
=
repo_item
.
get
(
'user_contribute_list'
)
if
repo_user_contribute_list
is
None
:
repo_user_contribute_list
=
[]
repo_item
[
'user_contribute_list'
]
=
repo_user_contribute_list
# 项目的用户贡献者列表增加
repo_user_contribute_list
.
append
({
"actor_email"
:
email
,
"total"
:
total
,
"repo_score"
:
repo_item
[
'score'
]
# 项目得分
})
# 项目内的用户贡献排序
for
repo_key
in
repo_top_n_dict
:
repo_item
=
repo_top_n_dict
[
repo_key
]
repo_user_contribute_list
=
repo_item
.
get
(
'user_contribute_list'
)
if
repo_user_contribute_list
is
not
None
:
repo_user_contribute_list
.
sort
(
key
=
lambda
x
:
x
[
"total"
],
reverse
=
True
)
else
:
print
(
f
"[warn] missing contributors repo:
{
repo_key
}
"
)
# 现在,合并每个项目的用户贡献到一个总表
personal_score_dict
=
{}
for
repo_key
in
repo_top_n_dict
:
repo_item
=
repo_top_n_dict
[
repo_key
]
repo_user_contribute_list
=
repo_item
.
get
(
'user_contribute_list'
)
if
repo_user_contribute_list
is
None
:
continue
for
repo_person
in
repo_user_contribute_list
:
email
=
repo_person
[
'actor_email'
]
person_key
=
email
.
lower
()
person_item
=
personal_score_dict
.
get
(
person_key
)
if
person_item
is
None
:
person_item
=
{
"score"
:
repo_person
[
"total"
]
*
repo_person
[
"repo_score"
],
}
personal_score_dict
[
person_key
]
=
person_item
person_info
=
personal_dict
.
get
(
person_key
)
# 忽略不匹配数据
if
person_info
is
None
:
continue
person_item
[
'avatar_url'
]
=
person_info
[
'avatar_url'
]
person_item
[
'name'
]
=
person_info
[
'name'
]
person_item
[
'company'
]
=
person_info
[
'company'
]
person_item
[
'location'
]
=
person_info
[
'location'
]
person_item
[
'followers'
]
=
person_info
[
'followers'
]
person_item
[
'author_id'
]
=
person_info
[
'author_id'
]
person_item
[
'type'
]
=
person_info
[
'type'
]
person_item
[
'login'
]
=
person_info
[
'login'
]
else
:
person_item
[
'score'
]
+=
repo_person
[
"total"
]
*
\
repo_person
[
"repo_score"
]
# 排行
df
=
pd
.
DataFrame
.
from_dict
(
personal_score_dict
,
orient
=
'index'
)
df
.
sort_values
(
by
=
"score"
,
ascending
=
False
)
df
.
to_csv
(
config
[
"ranks"
][
"personal"
][
"top_n"
])
print
(
df
.
head
())
def
calc_ranks
(
config
,
options
):
ctx
=
{}
# print("@load_repo_github_user_info..")
def
rank_company_top_n
(
config
,
ctx
):
# load_repo_github_user_info(config, ctx)
pass
# print("@load_repo_commit_rank..")
# load_repo_commit_rank(config, ctx)
def
calc_repo_rank
(
config
,
options
,
ctx
):
print
(
"@load_repo_github_info.."
)
print
(
"@load_repo_github_info.."
)
load_repo_github_info
(
config
,
ctx
)
load_repo_github_info
(
config
,
ctx
)
...
@@ -169,8 +294,29 @@ def calc_ranks(config, options):
...
@@ -169,8 +294,29 @@ def calc_ranks(config, options):
print
(
"@rank_repo_top_n.."
)
print
(
"@rank_repo_top_n.."
)
rank_repo_top_n
(
config
,
ctx
)
rank_repo_top_n
(
config
,
ctx
)
# print("@rank_personal_top_n..")
# rank_personal_top_n(config, ctx)
def
calc_personal_rank
(
config
,
options
,
ctx
):
print
(
"@load_repo_github_user_info.."
)
load_repo_github_user_info
(
config
,
ctx
)
print
(
"@load_repo_github_user_commit_info.."
)
load_repo_github_user_commit_info
(
config
,
ctx
)
print
(
"@rank_personal_top_n.."
)
rank_personal_top_n
(
config
,
ctx
)
def
calc_company_rank
(
config
,
options
,
ctx
):
# print("@rank_company_top_n..")
# print("@rank_company_top_n..")
# rank_company_top_n(config, ctx)
# rank_company_top_n(config, ctx)
pass
def
calc_ranks
(
config
,
options
):
ctx
=
{
"ranks"
:
{}
}
calc_repo_rank
(
config
,
options
,
ctx
)
calc_personal_rank
(
config
,
options
,
ctx
)
# calc_company_rank(config, options, ctx)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录