提交 55951f0f 编写于 作者: F feilong

refactor codes

上级 6aef16d8
.DS_Store .DS_Store
*.txt *.txt
~$*.xlsx ~$*.xlsx
\ No newline at end of file __pycache__
!requirement.txt
\ No newline at end of file
...@@ -15,6 +15,13 @@ ...@@ -15,6 +15,13 @@
* `ranks/` 目录下是榜单数据输出目录 * `ranks/` 目录下是榜单数据输出目录
* `src/` 是数据获取和榜单计算源代码目录,其中榜单计算代码是:`src/main.py` * `src/` 是数据获取和榜单计算源代码目录,其中榜单计算代码是:`src/main.py`
## 项目配置和运行
1. python 3 环境
2. cd src/
3. `pip install -r requirements.txt`
4. `python main.py -a task.ransks`
## 数据源 ## 数据源
1. **Apache 基金会项目** ,GitHub Apache 组织下的所有项目,通过 GitHub API 获取,每个 Json 文件含 100 个项目; 1. **Apache 基金会项目** ,GitHub Apache 组织下的所有项目,通过 GitHub API 获取,每个 Json 文件含 100 个项目;
......
# -*- coding: UTF-8 -*-
'''
命令行分发路由
'''
def dispatch(actions, targets):
""" 分发命令行 action """
action_len = len(actions)
print(action_len)
if action_len < 2:
if targets.get('run') != None:
print(f"[命令路由执行]:", '->'.join(actions))
targets['run']()
else:
print('action not found')
return
index = 0
next = targets
action = actions[index]
print(f"[命令路由中..]: {actions[0]}")
print(actions)
while action_len >= index:
if type(next) == type({}):
if index == action_len:
if next.get('run') is not None:
print(f"[命令路由执行]:", '->'.join(actions))
next['run']()
break
else:
print('not found')
action = actions[index]
if next.get(action) is not None:
print(f"[命令路由中..]: {action}")
next = next[action]
index += 1
else:
print("[命令路由错误]: 未找到支持的命令行路由:",
'->'.join(actions), ", obj:", next)
index += 1
else:
print(f"[命令路由执行]:", '->'.join(actions))
next()
index += 1
break
config = {
"ranks": {
"personal": {
"top_n": "../ranks/个人向主要开源技术栈贡献榜单.csv",
"top_n_en": "../ranks/个人向国际主要开源技术栈贡献榜单.csv",
"top_n_zh_cn": "../ranks/个人向中国主要开源技术栈贡献榜单.csv",
},
"company": {
"top_n": "../ranks/公司向主要开源技术栈贡献榜单.csv",
"top_n_en": "../ranks/公司向国际主要开源技术栈贡献榜单.csv",
"top_n_zh_cn": "../ranks/公司向中国主要开源技术栈贡献榜单.csv",
},
"repo": {
"top_n": "../ranks/开源项目榜.csv",
"top_n_en": "../ranks/开源项目榜_非中国项目.csv",
"top_n_zh_cn": "../ranks/开源项目榜_中国项目.csv"
}
},
"schema": {
"repo_github_user_info": {
"file": "../data/GitHub/Userinfo.xlsx",
"sheet_name": "repo",
"desc": "开源项目Github贡献者信息",
"fields": [
{
"field_name": "actor_email",
"field_type": "str",
"desc": "用户邮箱"
},
{
"field_name": "sum_total",
"field_type": "int",
"desc": "用户累计Github项目贡献数"
},
{
"feild_name": "any_repo_path",
"field_type": "str",
"desc": "用户贡献过的任意一个Github仓库路径"
},
{
"field_name": "any_commit_id",
"field_type": "str",
"desc": "用户在上述贡献过的Github仓库里的任意一个commit"
},
{
"field_name": "avatar_url",
"field_type": "str",
"desc": "用户头像"
},
{
"field_name": "name",
"field_type": "str",
"desc": "用户昵称"
},
{
"field_name": "company",
"field_type": "用户在Github上填写的公司名称",
"desc": ""
},
{
"field_name": "location",
"field_type": "str",
"desc": "用户在Github上填写的城市信息"
},
{
"field_name": "followers",
"field_type": "int",
"desc": "用户的被关注数"
},
{
"field_name": "author_id",
"field_type": "int",
"desc": "用户Github的ID"
},
{
"field_name": "type",
"field_type": "str",
"desc": "账号类型,人类或机器人"
},
{
"field_name": "login",
"field_type": "str",
"desc": "登录名"
},
{
"field_name": "created_at",
"field_type": "date",
"desc": "创建时间"
},
{
"field_name": "updated_at",
"field_type": "date",
"desc": "更新时间"
}
]
},
"repo_github_info": {
"file": "../data/Github-Repos.xlsx",
"sheet_name": "汇总",
"desc": "开源项目在Github上的项目交互数据",
"fields": [
{
"filed_name": "ID",
"field_type": "int",
"desc": "编号"
},
{
"field_name": "RepoID",
"field_type": "int",
"desc": "开源项目GithubID"
},
{
"field_name": "Org",
"field_type": "str",
"desc": "开源项目组织"
},
{
"field_name": "FullName",
"field_type": "str",
"desc": "开源项目全名"
},
{
"field_name": "Url",
"field_type": "str",
"desc": "开源项目url"
},
{
"field_name": "Repo",
"field_type": "str",
"desc": "仓库名"
},
{
"field_name": "Star",
"desc": "开源项目的Github Star 总数",
"field_type": "str"
},
{
"field_name": "Fork",
"desc": "开源项目的Github Fork 总数",
"field_type": "str"
},
{
"field_name": "Contributors",
"desc": "开源项目的Github 贡献者 总数",
"field_type": "str"
},
{
"field_name": "OSC-URL",
"desc": "开源项目的Github CodeChina URL",
"field_type": "str"
},
{
"field_name": "Region",
"desc": "开源项目所属的区域",
"field_type": "str"
}
]
},
"repo_csdn_trends": {
"file": "../data/CSDN/repo-csdn-trends.xlsx",
"sheet_name": "Sheet1",
"desc": "开源项目在CSDN站内指数数据",
"fields": [
{
"field_name": "repo_name",
"field_type": "str",
"desc": "开源项目名称"
},
{
"field_name": "2020-10-01 00:00:00",
"field_type": "int",
"desc": "月份第一天时间"
}
]
},
"repo_commit_rank": {
"file": "../data/CSDN/repo-commit-rank.csv",
"desc": "开源项目在Github的月commit变化",
"fields": [
{
"field_name": "actor_email",
"field_type": "str",
"desc": "用户邮箱"
},
{
"field_name": "sum_total",
"field_type": "int",
"desc": "用户累计Github项目贡献数"
},
{
"feild_name": "any_repo_path",
"field_type": "str",
"desc": "用户贡献过的任意一个Github仓库路径"
},
{
"field_name": "any_commit_id",
"field_type": "str",
"desc": "用户在上述贡献过的Github仓库里的任意一个commit"
}
]
},
"repo_github_active_trends": {
"file": "../data/PingCAP/项目活跃度变化.csv",
"desc": "开源项目在Github上的月活跃度数据",
"fields": [
{
"filed_name": "event_month",
"field_type": "date",
"desc": "交互数据汇总月份"
},
{
"field_name": "repo_name",
"field_type": "str",
"desc": "开源项目名称"
},
{
"field_name": "push_count",
"field_type": "int",
"desc": "开源项目当月 git push 次数"
},
{
"field_name": "pr_count",
"field_type": "int",
"desc": "开源项目当月 git pr 次数"
},
{
"field_name": "issue_count",
"field_type": "int",
"desc": "开源项目当月 git issue 个数"
},
{
"field_name": "creator_count",
"field_type": "int",
"desc": "开源项目当月 git操作 创建者总数"
}
]
},
"repo_github_popular_trends": {
"file": "../data/PingCAP/项目受欢迎度变化.csv",
"desc": "开源项目在Github上的月收欢迎程度数据",
"fields": [
{
"filed_name": "event_month",
"field_type": "date",
"desc": "交互数据汇总月份"
},
{
"field_name": "repo_name",
"field_type": "str",
"desc": "开源项目名称"
},
{
"field_name": "push_count",
"field_type": "int",
"desc": "开源项目当月 git push 次数"
},
{
"field_name": "pr_count",
"field_type": "int",
"desc": "开源项目当月 git pr 次数"
},
{
"field_name": "issue_count",
"field_type": "int",
"desc": "开源项目当月 git issue 个数"
},
{
"field_name": "creator_count",
"field_type": "int",
"desc": "开源项目当月 git操作 创建者总数"
}
]
}
},
}
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import os from common.cli import dispatch
import numpy as np from config import config
import pandas as pd from tasks.ranks import calc_ranks
from optparse import OptionParser
config = {
"ranks": {
"personal": { def parse_options():
"top_n": "../ranks/个人向主要开源技术栈贡献榜单.csv", parser = OptionParser()
"top_n_en": "../ranks/个人向国际主要开源技术栈贡献榜单.csv", parser.add_option(
"top_n_zh_cn": "../ranks/个人向中国主要开源技术栈贡献榜单.csv", "-a", "--action",
}, dest="action",
"company": { help="action",
"top_n": "../ranks/公司向主要开源技术栈贡献榜单.csv", metavar="ACTION"
"top_n_en": "../ranks/公司向国际主要开源技术栈贡献榜单.csv", )
"top_n_zh_cn": "../ranks/公司向中国主要开源技术栈贡献榜单.csv", (options, args) = parser.parse_args()
}, return [options, args]
"repo": {
"top_n": "../ranks/开源项目榜.csv",
"top_n_en": "../ranks/开源项目榜_非中国项目.csv", def show_help():
"top_n_zh_cn": "../ranks/开源项目榜_中国项目.csv" print("请指定任务类型,用例:")
} print("python main.py -a tasks.ranks")
},
"schema": {
"repo_github_user_info": { def main(config, options, actions):
"file": "../data/GitHub/Userinfo.xlsx", # 配置任务路由
"sheet_name": "repo", dispatch(actions, {
"desc": "开源项目Github贡献者信息", # 请在此添加其他任务路由
"fields": [ "tasks": {
{ "ranks": lambda: calc_ranks(config, options)
"field_name": "actor_email",
"field_type": "str",
"desc": "用户邮箱"
},
{
"field_name": "sum_total",
"field_type": "int",
"desc": "用户累计Github项目贡献数"
},
{
"feild_name": "any_repo_path",
"field_type": "str",
"desc": "用户贡献过的任意一个Github仓库路径"
},
{
"field_name": "any_commit_id",
"field_type": "str",
"desc": "用户在上述贡献过的Github仓库里的任意一个commit"
},
{
"field_name": "avatar_url",
"field_type": "str",
"desc": "用户头像"
},
{
"field_name": "name",
"field_type": "str",
"desc": "用户昵称"
},
{
"field_name": "company",
"field_type": "用户在Github上填写的公司名称",
"desc": ""
},
{
"field_name": "location",
"field_type": "str",
"desc": "用户在Github上填写的城市信息"
},
{
"field_name": "followers",
"field_type": "int",
"desc": "用户的被关注数"
},
{
"field_name": "author_id",
"field_type": "int",
"desc": "用户Github的ID"
},
{
"field_name": "type",
"field_type": "str",
"desc": "账号类型,人类或机器人"
},
{
"field_name": "login",
"field_type": "str",
"desc": "登录名"
},
{
"field_name": "created_at",
"field_type": "date",
"desc": "创建时间"
},
{
"field_name": "updated_at",
"field_type": "date",
"desc": "更新时间"
}
]
},
"repo_github_info": {
"file": "../data/Github-Repos.xlsx",
"sheet_name": "汇总",
"desc": "开源项目在Github上的项目交互数据",
"fields": [
{
"filed_name": "ID",
"field_type": "int",
"desc": "编号"
},
{
"field_name": "RepoID",
"field_type": "int",
"desc": "开源项目GithubID"
},
{
"field_name": "Org",
"field_type": "str",
"desc": "开源项目组织"
},
{
"field_name": "FullName",
"field_type": "str",
"desc": "开源项目全名"
},
{
"field_name": "Url",
"field_type": "str",
"desc": "开源项目url"
},
{
"field_name": "Repo",
"field_type": "str",
"desc": "仓库名"
},
{
"field_name": "Star",
"desc": "开源项目的Github Star 总数",
"field_type": "str"
},
{
"field_name": "Fork",
"desc": "开源项目的Github Fork 总数",
"field_type": "str"
},
{
"field_name": "Contributors",
"desc": "开源项目的Github 贡献者 总数",
"field_type": "str"
},
{
"field_name": "OSC-URL",
"desc": "开源项目的Github CodeChina URL",
"field_type": "str"
},
{
"field_name": "Region",
"desc": "开源项目所属的区域",
"field_type": "str"
}
]
},
"repo_csdn_trends": {
"file": "../data/CSDN/repo-csdn-trends.xlsx",
"sheet_name": "Sheet1",
"desc": "开源项目在CSDN站内指数数据",
"fields": [
{
"field_name": "repo_name",
"field_type": "str",
"desc": "开源项目名称"
},
{
"field_name": "2020-10-01 00:00:00",
"field_type": "int",
"desc": "月份第一天时间"
}
]
},
"repo_commit_rank": {
"file": "../data/CSDN/repo-commit-rank.csv",
"desc": "开源项目在Github的月commit变化",
"fields": [
{
"field_name": "actor_email",
"field_type": "str",
"desc": "用户邮箱"
},
{
"field_name": "sum_total",
"field_type": "int",
"desc": "用户累计Github项目贡献数"
},
{
"feild_name": "any_repo_path",
"field_type": "str",
"desc": "用户贡献过的任意一个Github仓库路径"
},
{
"field_name": "any_commit_id",
"field_type": "str",
"desc": "用户在上述贡献过的Github仓库里的任意一个commit"
}
]
},
"repo_github_active_trends": {
"file": "../data/PingCAP/项目活跃度变化.csv",
"desc": "开源项目在Github上的月活跃度数据",
"fields": [
{
"filed_name": "event_month",
"field_type": "date",
"desc": "交互数据汇总月份"
},
{
"field_name": "repo_name",
"field_type": "str",
"desc": "开源项目名称"
},
{
"field_name": "push_count",
"field_type": "int",
"desc": "开源项目当月 git push 次数"
},
{
"field_name": "pr_count",
"field_type": "int",
"desc": "开源项目当月 git pr 次数"
},
{
"field_name": "issue_count",
"field_type": "int",
"desc": "开源项目当月 git issue 个数"
},
{
"field_name": "creator_count",
"field_type": "int",
"desc": "开源项目当月 git操作 创建者总数"
}
]
},
"repo_github_popular_trends": {
"file": "../data/PingCAP/项目受欢迎度变化.csv",
"desc": "开源项目在Github上的月收欢迎程度数据",
"fields": [
{
"filed_name": "event_month",
"field_type": "date",
"desc": "交互数据汇总月份"
},
{
"field_name": "repo_name",
"field_type": "str",
"desc": "开源项目名称"
},
{
"field_name": "push_count",
"field_type": "int",
"desc": "开源项目当月 git push 次数"
},
{
"field_name": "pr_count",
"field_type": "int",
"desc": "开源项目当月 git pr 次数"
},
{
"field_name": "issue_count",
"field_type": "int",
"desc": "开源项目当月 git issue 个数"
},
{
"field_name": "creator_count",
"field_type": "int",
"desc": "开源项目当月 git操作 创建者总数"
}
]
} }
}, })
}
def load_repo_github_user_info(config, ctx):
schema = config["schema"]
df = pd.read_excel(
schema["repo_github_user_info"]["file"],
sheet_name=schema["repo_github_user_info"]["sheet_name"])
df.fillna(value=0, inplace=True)
ctx["repo_github_user_info"] = df
def load_repo_github_info(config, ctx):
schema = config["schema"]
df = pd.read_excel(
schema["repo_github_info"]["file"],
sheet_name=schema["repo_github_info"]["sheet_name"])
df.fillna(value=0, inplace=True)
ctx["repo_github_info"] = df
def load_repo_csdn_trends(config, ctx):
schema = config["schema"]
df = pd.read_excel(
schema["repo_csdn_trends"]["file"],
sheet_name=schema["repo_csdn_trends"]["sheet_name"])
df.fillna(value=0, inplace=True)
ctx["repo_csdn_trends"] = df
def load_repo_commit_rank(config, ctx):
schema = config["schema"]
df = pd.read_csv(schema["repo_commit_rank"]["file"])
df.fillna(value=0, inplace=True)
ctx["repo_commit_rank"] = df
def load_repo_github_active_trends(config, ctx):
schema = config["schema"]
df = pd.read_csv(schema["repo_github_active_trends"]["file"])
df.fillna(value=0, inplace=True)
ctx["repo_github_active_trends"] = df
def load_repo_github_popular_trends(config, ctx):
schema = config["schema"]
df = pd.read_csv(schema["repo_github_popular_trends"]["file"])
df.fillna(value=0, inplace=True)
ctx["repo_github_popular_trends"] = df
def rank_repo_top_n(config, ctx):
repo_rank = []
repo_dict = {}
# 合并项目的总数据 repo_github_info 主键 FullName
df = ctx["repo_github_info"]
df.fillna(value=0)
for index, row in df.iterrows():
repo_item = {}
repo_key = row["FullName"].lower()
repo_item["region"] = row["Region"]
repo_item["star"] = int(row["Star"])
repo_item["fork"] = int(row["Fork"])
repo_item["contributors"] = int(row["Contributors"])
repo_dict[repo_key] = repo_item
repo_rank.append(repo_item)
# 合并项目的CSDN指数数据 repo_csdn_trends 主键 repo_name
df = ctx["repo_csdn_trends"]
for index, row in df.iterrows():
repo_name = row['repo_name']
repo_key = repo_name.lower()
repo_item = repo_dict.get(repo_key)
repo_item["csdn_index_month_avg"] = row[1:].mean()
# 合并项目的活跃变动数据 repo_github_active_trends 主键 repo_name
df = ctx["repo_github_active_trends"]
df.fillna(value=0)
df = df.groupby(["repo_name"]).agg(np.mean)
# print(df.loc['TheAlgorithms/Python'])
for index, row in df.iterrows():
repo_name = index
repo_key = repo_name.lower()
repo_item = repo_dict.get(repo_key)
repo_item["push_count_month_avg"] = row["push_count"]
repo_item["pr_count_month_avg"] = row["pr_count"]
repo_item["issue_count_month_avg"] = row["issue_count"]
repo_item["creator_count_month_avg"] = row["creator_count"]
# 合并项目的激活变动数据 repo_github_popular_trends 主键 repo_name
df = ctx["repo_github_popular_trends"]
df = df.groupby(["repo_name"]).agg(np.mean)
for index, row in df.iterrows():
repo_name = index
repo_key = repo_name.lower()
repo_item = repo_dict.get(repo_key)
repo_item["watch_count_month_avg"] = row["watch_count"]
repo_item["fork_count_month_avg"] = row["fork_count"]
# 合并表
df = pd.DataFrame.from_dict(repo_dict, orient='index')
df.fillna(value=0, inplace=True)
df.reset_index()
# 计算榜单得分
weights = {
"star": 1,
"fork": 1,
"contributors": 1,
"csdn_index_month_avg": 1,
"push_count_month_avg": 1,
"pr_count_month_avg": 1,
"issue_count_month_avg": 1,
"creator_count_month_avg": 1,
"watch_count_month_avg": 1,
"fork_count_month_avg": 1
}
total_weight_value = 0
for key in weights:
total_weight_value += weights[key]
for key in weights:
weights[key] = weights[key]/total_weight_value
df["score"] = 0
for key in weights:
df["score"] += df[key].apply(lambda x: x*weights[key])
print(df.head())
df = df.sort_values(by="score", ascending=False)
df.to_csv(config["ranks"]["repo"]["top_n"])
def rank_personal_top_n(config, ctx):
pass
def rank_company_top_n(config, ctx):
pass
def main(config):
ctx = {}
# print("@load_repo_github_user_info..")
# load_repo_github_user_info(config, ctx)
# print("@load_repo_commit_rank..")
# load_repo_commit_rank(config, ctx)
print("@load_repo_github_info..")
load_repo_github_info(config, ctx)
print("@load_repo_csdn_trends..")
load_repo_csdn_trends(config, ctx)
print("@load_repo_github_active_trends..")
load_repo_github_active_trends(config, ctx)
print("@load_repo_github_popular_trends..")
load_repo_github_popular_trends(config, ctx)
print("@rank_repo_top_n..")
rank_repo_top_n(config, ctx)
# print("@rank_personal_top_n..")
# rank_personal_top_n(config, ctx)
# print("@rank_company_top_n..")
# rank_company_top_n(config, ctx)
if __name__ == "__main__": if __name__ == "__main__":
main(config) [options, args] = parse_options()
if options.action is None:
show_help()
else:
actions = options.action.split('.')
if len(actions) == 0:
show_help()
else:
main(config, options, actions)
# -*- coding: utf-8 -*-
import os
import numpy as np
import pandas as pd
from config import config
def load_repo_github_user_info(config, ctx):
schema = config["schema"]
df = pd.read_excel(
schema["repo_github_user_info"]["file"],
sheet_name=schema["repo_github_user_info"]["sheet_name"])
df.fillna(value=0, inplace=True)
ctx["repo_github_user_info"] = df
def load_repo_github_info(config, ctx):
schema = config["schema"]
df = pd.read_excel(
schema["repo_github_info"]["file"],
sheet_name=schema["repo_github_info"]["sheet_name"])
df.fillna(value=0, inplace=True)
ctx["repo_github_info"] = df
def load_repo_csdn_trends(config, ctx):
schema = config["schema"]
df = pd.read_excel(
schema["repo_csdn_trends"]["file"],
sheet_name=schema["repo_csdn_trends"]["sheet_name"])
df.fillna(value=0, inplace=True)
ctx["repo_csdn_trends"] = df
def load_repo_commit_rank(config, ctx):
schema = config["schema"]
df = pd.read_csv(schema["repo_commit_rank"]["file"])
df.fillna(value=0, inplace=True)
ctx["repo_commit_rank"] = df
def load_repo_github_active_trends(config, ctx):
schema = config["schema"]
df = pd.read_csv(schema["repo_github_active_trends"]["file"])
df.fillna(value=0, inplace=True)
ctx["repo_github_active_trends"] = df
def load_repo_github_popular_trends(config, ctx):
schema = config["schema"]
df = pd.read_csv(schema["repo_github_popular_trends"]["file"])
df.fillna(value=0, inplace=True)
ctx["repo_github_popular_trends"] = df
def rank_repo_top_n(config, ctx):
repo_rank = []
repo_dict = {}
# 合并项目的总数据 repo_github_info 主键 FullName
df = ctx["repo_github_info"]
df.fillna(value=0)
for index, row in df.iterrows():
repo_item = {}
repo_key = row["FullName"].lower()
repo_item["region"] = row["Region"]
repo_item["star"] = int(row["Star"])
repo_item["fork"] = int(row["Fork"])
repo_item["contributors"] = int(row["Contributors"])
repo_dict[repo_key] = repo_item
repo_rank.append(repo_item)
# 合并项目的CSDN指数数据 repo_csdn_trends 主键 repo_name
df = ctx["repo_csdn_trends"]
for index, row in df.iterrows():
repo_name = row['repo_name']
repo_key = repo_name.lower()
repo_item = repo_dict.get(repo_key)
repo_item["csdn_index_month_avg"] = row[1:].mean()
# 合并项目的活跃变动数据 repo_github_active_trends 主键 repo_name
df = ctx["repo_github_active_trends"]
df.fillna(value=0)
df = df.groupby(["repo_name"]).agg(np.mean)
# print(df.loc['TheAlgorithms/Python'])
for index, row in df.iterrows():
repo_name = index
repo_key = repo_name.lower()
repo_item = repo_dict.get(repo_key)
repo_item["push_count_month_avg"] = row["push_count"]
repo_item["pr_count_month_avg"] = row["pr_count"]
repo_item["issue_count_month_avg"] = row["issue_count"]
repo_item["creator_count_month_avg"] = row["creator_count"]
# 合并项目的激活变动数据 repo_github_popular_trends 主键 repo_name
df = ctx["repo_github_popular_trends"]
df = df.groupby(["repo_name"]).agg(np.mean)
for index, row in df.iterrows():
repo_name = index
repo_key = repo_name.lower()
repo_item = repo_dict.get(repo_key)
repo_item["watch_count_month_avg"] = row["watch_count"]
repo_item["fork_count_month_avg"] = row["fork_count"]
# 合并表
df = pd.DataFrame.from_dict(repo_dict, orient='index')
df.fillna(value=0, inplace=True)
df.reset_index()
# 计算榜单得分
weights = {
"star": 1,
"fork": 1,
"contributors": 1,
"csdn_index_month_avg": 1,
"push_count_month_avg": 1,
"pr_count_month_avg": 1,
"issue_count_month_avg": 1,
"creator_count_month_avg": 1,
"watch_count_month_avg": 1,
"fork_count_month_avg": 1
}
total_weight_value = 0
for key in weights:
total_weight_value += weights[key]
for key in weights:
weights[key] = weights[key]/total_weight_value
df["score"] = 0
for key in weights:
df["score"] += df[key].apply(lambda x: x*weights[key])
print(df.head())
df = df.sort_values(by="score", ascending=False)
df.to_csv(config["ranks"]["repo"]["top_n"])
def rank_personal_top_n(config, ctx):
pass
def rank_company_top_n(config, ctx):
pass
def calc_ranks(config, options):
ctx = {}
# print("@load_repo_github_user_info..")
# load_repo_github_user_info(config, ctx)
# print("@load_repo_commit_rank..")
# load_repo_commit_rank(config, ctx)
print("@load_repo_github_info..")
load_repo_github_info(config, ctx)
print("@load_repo_csdn_trends..")
load_repo_csdn_trends(config, ctx)
print("@load_repo_github_active_trends..")
load_repo_github_active_trends(config, ctx)
print("@load_repo_github_popular_trends..")
load_repo_github_popular_trends(config, ctx)
print("@rank_repo_top_n..")
rank_repo_top_n(config, ctx)
# print("@rank_personal_top_n..")
# rank_personal_top_n(config, ctx)
# print("@rank_company_top_n..")
# rank_company_top_n(config, ctx)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册