Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
CSDN 技术社区
1024 Report
提交
41053a6f
1
1024 Report
项目概览
CSDN 技术社区
/
1024 Report
通知
84
Star
6
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
1
1024 Report
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
41053a6f
编写于
10月 13, 2022
作者:
F
feilong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
init repo rank calc
上级
1ca50867
变更
3
展开全部
显示空白变更内容
内联
并排
Showing
3 changed file
with
154 addition
and
23 deletion
+154
-23
GitHub-Repos.xlsx
GitHub-Repos.xlsx
+0
-0
ranks/开源项目榜.csv
ranks/开源项目榜.csv
+0
-0
src/main.py
src/main.py
+154
-23
未找到文件。
GitHub-Repos.xlsx
浏览文件 @
41053a6f
无法预览此类型文件
ranks/开源项目榜.csv
0 → 100644
浏览文件 @
41053a6f
此差异已折叠。
点击以展开。
src/main.py
浏览文件 @
41053a6f
import
os
import
os
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
config
=
{
config
=
{
"ranks"
:
{
"ranks"
:
{
"personal"
:
{
"personal"
:
{
"top_n"
:
"../rank/个人向主要开源技术栈贡献榜单.csv"
,
"top_n"
:
"../rank
s
/个人向主要开源技术栈贡献榜单.csv"
,
"top_n_en"
:
"../rank/个人向国际主要开源技术栈贡献榜单.csv"
,
"top_n_en"
:
"../rank
s
/个人向国际主要开源技术栈贡献榜单.csv"
,
"top_n_zh_cn"
:
"../rank/个人向中国主要开源技术栈贡献榜单.csv"
,
"top_n_zh_cn"
:
"../rank
s
/个人向中国主要开源技术栈贡献榜单.csv"
,
},
},
"company"
:
{
"company"
:
{
"top_n"
:
"../rank/公司向主要开源技术栈贡献榜单.csv"
,
"top_n"
:
"../rank
s
/公司向主要开源技术栈贡献榜单.csv"
,
"top_n_en"
:
"../rank/公司向国际主要开源技术栈贡献榜单.csv"
,
"top_n_en"
:
"../rank
s
/公司向国际主要开源技术栈贡献榜单.csv"
,
"top_n_zh_cn"
:
"../rank/公司向中国主要开源技术栈贡献榜单.csv"
,
"top_n_zh_cn"
:
"../rank
s
/公司向中国主要开源技术栈贡献榜单.csv"
,
},
},
"repo"
:
{
"repo"
:
{
"top_n"
:
"../rank/开源项目榜.csv"
,
"top_n"
:
"../rank
s
/开源项目榜.csv"
,
"top_n_en"
:
"../rank/开源项目榜_非中国项目.csv"
,
"top_n_en"
:
"../rank
s
/开源项目榜_非中国项目.csv"
,
"top_n_zh_cn"
:
"../rank/开源项目榜_中国项目.csv"
"top_n_zh_cn"
:
"../rank
s
/开源项目榜_中国项目.csv"
}
}
},
},
"schema"
:
{
"schema"
:
{
...
@@ -176,6 +177,32 @@ config = {
...
@@ -176,6 +177,32 @@ config = {
}
}
]
]
},
},
"repo_commit_rank"
:
{
"file"
:
"../CSDN/repo-commit-rank.csv"
,
"desc"
:
"开源项目在Github的月commit变化"
,
"fields"
:
[
{
"field_name"
:
"actor_email"
,
"field_type"
:
"str"
,
"desc"
:
"用户邮箱"
},
{
"field_name"
:
"sum_total"
,
"field_type"
:
"int"
,
"desc"
:
"用户累计Github项目贡献数"
},
{
"feild_name"
:
"any_repo_path"
,
"field_type"
:
"str"
,
"desc"
:
"用户贡献过的任意一个Github仓库路径"
},
{
"field_name"
:
"any_commit_id"
,
"field_type"
:
"str"
,
"desc"
:
"用户在上述贡献过的Github仓库里的任意一个commit"
}
]
},
"repo_github_active_trends"
:
{
"repo_github_active_trends"
:
{
"file"
:
"../PingCAP/项目活跃度变化.csv"
,
"file"
:
"../PingCAP/项目活跃度变化.csv"
,
"desc"
:
"开源项目在Github上的月活跃度数据"
,
"desc"
:
"开源项目在Github上的月活跃度数据"
,
...
@@ -257,7 +284,7 @@ def load_repo_github_user_info(config, ctx):
...
@@ -257,7 +284,7 @@ def load_repo_github_user_info(config, ctx):
df
=
pd
.
read_excel
(
df
=
pd
.
read_excel
(
schema
[
"repo_github_user_info"
][
"file"
],
schema
[
"repo_github_user_info"
][
"file"
],
sheet_name
=
schema
[
"repo_github_user_info"
][
"sheet_name"
])
sheet_name
=
schema
[
"repo_github_user_info"
][
"sheet_name"
])
print
(
df
.
head
()
)
df
.
fillna
(
value
=
0
,
inplace
=
True
)
ctx
[
"repo_github_user_info"
]
=
df
ctx
[
"repo_github_user_info"
]
=
df
...
@@ -266,7 +293,7 @@ def load_repo_github_info(config, ctx):
...
@@ -266,7 +293,7 @@ def load_repo_github_info(config, ctx):
df
=
pd
.
read_excel
(
df
=
pd
.
read_excel
(
schema
[
"repo_github_info"
][
"file"
],
schema
[
"repo_github_info"
][
"file"
],
sheet_name
=
schema
[
"repo_github_info"
][
"sheet_name"
])
sheet_name
=
schema
[
"repo_github_info"
][
"sheet_name"
])
print
(
df
.
head
()
)
df
.
fillna
(
value
=
0
,
inplace
=
True
)
ctx
[
"repo_github_info"
]
=
df
ctx
[
"repo_github_info"
]
=
df
...
@@ -274,50 +301,154 @@ def load_repo_csdn_trends(config, ctx):
...
@@ -274,50 +301,154 @@ def load_repo_csdn_trends(config, ctx):
schema
=
config
[
"schema"
]
schema
=
config
[
"schema"
]
df
=
pd
.
read_excel
(
df
=
pd
.
read_excel
(
schema
[
"repo_csdn_trends"
][
"file"
],
schema
[
"repo_csdn_trends"
][
"file"
],
sheet_name
=
schema
[
"repo_
github_info
"
][
"sheet_name"
])
sheet_name
=
schema
[
"repo_
csdn_trends
"
][
"sheet_name"
])
print
(
df
.
head
()
)
df
.
fillna
(
value
=
0
,
inplace
=
True
)
ctx
[
"repo_csdn_trends"
]
=
df
ctx
[
"repo_csdn_trends"
]
=
df
def
load_repo_commit_rank
(
config
,
ctx
):
def
load_repo_commit_rank
(
config
,
ctx
):
schema
=
config
[
"schema"
]
schema
=
config
[
"schema"
]
df
=
pd
.
read_csv
(
schema
[
"repo_commit_rank"
][
"file"
])
df
=
pd
.
read_csv
(
schema
[
"repo_commit_rank"
][
"file"
])
print
(
df
.
columns
)
df
.
fillna
(
value
=
0
,
inplace
=
True
)
ctx
[
"repo_commit_rank"
]
=
df
ctx
[
"repo_commit_rank"
]
=
df
def
load_repo_github_active_trends
(
config
,
ctx
):
def
load_repo_github_active_trends
(
config
,
ctx
):
schema
=
config
[
"schema"
]
schema
=
config
[
"schema"
]
df
=
pd
.
read_csv
(
schema
[
"repo_github_active_trends"
][
"file"
])
df
=
pd
.
read_csv
(
schema
[
"repo_github_active_trends"
][
"file"
])
print
(
df
.
columns
)
df
.
fillna
(
value
=
0
,
inplace
=
True
)
ctx
[
"repo_github_active_trends"
]
=
df
ctx
[
"repo_github_active_trends"
]
=
df
def
rank_personal_top_n
(
config
,
ctx
):
def
load_repo_github_popular_trends
(
config
,
ctx
):
pass
schema
=
config
[
"schema"
]
df
=
pd
.
read_csv
(
schema
[
"repo_github_popular_trends"
][
"file"
])
df
.
fillna
(
value
=
0
,
inplace
=
True
)
ctx
[
"repo_github_popular_trends"
]
=
df
def
rank_company_top_n
(
config
,
ctx
):
def
rank_repo_top_n
(
config
,
ctx
):
repo_rank
=
[]
repo_dict
=
{}
# 合并项目的总数据 repo_github_info 主键 FullName
df
=
ctx
[
"repo_github_info"
]
df
.
fillna
(
value
=
0
)
for
index
,
row
in
df
.
iterrows
():
repo_item
=
{}
repo_key
=
row
[
"FullName"
].
lower
()
repo_item
[
"region"
]
=
row
[
"Region"
]
repo_item
[
"star"
]
=
int
(
row
[
"Star"
])
repo_item
[
"fork"
]
=
int
(
row
[
"Fork"
])
repo_item
[
"contributors"
]
=
int
(
row
[
"Contributors"
])
repo_dict
[
repo_key
]
=
repo_item
repo_rank
.
append
(
repo_item
)
# 合并项目的CSDN指数数据 repo_csdn_trends 主键 repo_name
df
=
ctx
[
"repo_csdn_trends"
]
for
index
,
row
in
df
.
iterrows
():
repo_name
=
row
[
'repo_name'
]
repo_key
=
repo_name
.
lower
()
repo_item
=
repo_dict
.
get
(
repo_key
)
repo_item
[
"csdn_index_month_avg"
]
=
row
[
1
:].
mean
()
# 合并项目的活跃变动数据 repo_github_active_trends 主键 repo_name
df
=
ctx
[
"repo_github_active_trends"
]
df
.
fillna
(
value
=
0
)
df
=
df
.
groupby
([
"repo_name"
]).
agg
(
np
.
mean
)
# print(df.loc['TheAlgorithms/Python'])
for
index
,
row
in
df
.
iterrows
():
repo_name
=
index
repo_key
=
repo_name
.
lower
()
repo_item
=
repo_dict
.
get
(
repo_key
)
repo_item
[
"push_count_month_avg"
]
=
row
[
"push_count"
]
repo_item
[
"pr_count_month_avg"
]
=
row
[
"pr_count"
]
repo_item
[
"issue_count_month_avg"
]
=
row
[
"issue_count"
]
repo_item
[
"creator_count_month_avg"
]
=
row
[
"creator_count"
]
# 合并项目的激活变动数据 repo_github_popular_trends 主键 repo_name
df
=
ctx
[
"repo_github_popular_trends"
]
df
=
df
.
groupby
([
"repo_name"
]).
agg
(
np
.
mean
)
for
index
,
row
in
df
.
iterrows
():
repo_name
=
index
repo_key
=
repo_name
.
lower
()
repo_item
=
repo_dict
.
get
(
repo_key
)
repo_item
[
"watch_count_month_avg"
]
=
row
[
"watch_count"
]
repo_item
[
"fork_count_month_avg"
]
=
row
[
"fork_count"
]
# 合并表
df
=
pd
.
DataFrame
.
from_dict
(
repo_dict
,
orient
=
'index'
)
df
.
fillna
(
value
=
0
,
inplace
=
True
)
df
.
reset_index
()
# 计算榜单得分
weights
=
{
"star"
:
1
,
"fork"
:
1
,
"contributors"
:
1
,
"csdn_index_month_avg"
:
1
,
"push_count_month_avg"
:
1
,
"pr_count_month_avg"
:
1
,
"issue_count_month_avg"
:
1
,
"creator_count_month_avg"
:
1
,
"watch_count_month_avg"
:
1
,
"fork_count_month_avg"
:
1
}
total_weight_value
=
0
for
key
in
weights
:
total_weight_value
+=
weights
[
key
]
for
key
in
weights
:
weights
[
key
]
=
weights
[
key
]
/
total_weight_value
df
[
"score"
]
=
0
for
key
in
weights
:
df
[
"score"
]
+=
df
[
key
].
apply
(
lambda
x
:
x
*
weights
[
key
])
print
(
df
.
head
())
df
=
df
.
sort_values
(
by
=
"score"
,
ascending
=
False
)
df
.
to_csv
(
config
[
"ranks"
][
"repo"
][
"top_n"
])
def
rank_personal_top_n
(
config
,
ctx
):
pass
pass
def
rank_
repo
_top_n
(
config
,
ctx
):
def
rank_
company
_top_n
(
config
,
ctx
):
pass
pass
def
main
(
config
):
def
main
(
config
):
ctx
=
{}
ctx
=
{}
load_repo_github_user_info
(
config
,
ctx
)
# print("@load_repo_github_user_info..")
# load_repo_github_user_info(config, ctx)
# print("@load_repo_commit_rank..")
# load_repo_commit_rank(config, ctx)
print
(
"@load_repo_github_info.."
)
load_repo_github_info
(
config
,
ctx
)
load_repo_github_info
(
config
,
ctx
)
print
(
"@load_repo_csdn_trends.."
)
load_repo_csdn_trends
(
config
,
ctx
)
load_repo_csdn_trends
(
config
,
ctx
)
load_repo_commit_rank
(
config
,
ctx
)
print
(
"@load_repo_github_active_trends.."
)
load_repo_github_active_trends
(
config
,
ctx
)
load_repo_github_active_trends
(
config
,
ctx
)
rank_personal_top_n
(
config
,
ctx
)
print
(
"@load_repo_github_popular_trends.."
)
rank_company_top_n
(
config
,
ctx
)
load_repo_github_popular_trends
(
config
,
ctx
)
print
(
"@rank_repo_top_n.."
)
rank_repo_top_n
(
config
,
ctx
)
rank_repo_top_n
(
config
,
ctx
)
# print("@rank_personal_top_n..")
# rank_personal_top_n(config, ctx)
# print("@rank_company_top_n..")
# rank_company_top_n(config, ctx)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
main
(
config
)
main
(
config
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录