提交 392cce9d 编写于 作者: F feilong

init rank framework

上级 ad67972f
*.txt .DS_Store
\ No newline at end of file *.txt
~$*.xlsx
\ No newline at end of file
import imp
import os import os
import pandas as pd import pandas as pd
config = { config = {
"csdn_trends": { "ranks": {
"file": "CSDN 指数.xlsx", "personal": {
"desc": "开源博客项目在CSDN站内词频数据" "top_n": "../rank/个人向主要开源技术栈贡献榜单.csv",
} "top_n_en": "../rank/个人向国际主要开源技术栈贡献榜单.csv",
"top_n_zh_cn": "../rank/个人向中国主要开源技术栈贡献榜单.csv",
},
"company": {
"top_n": "../rank/公司向主要开源技术栈贡献榜单.csv",
"top_n_en": "../rank/公司向国际主要开源技术栈贡献榜单.csv",
"top_n_zh_cn": "../rank/公司向中国主要开源技术栈贡献榜单.csv",
},
"repo": {
"top_n": "../rank/开源项目榜.csv",
"top_n_en": "../rank/开源项目榜_非中国项目.csv",
"top_n_zh_cn": "../rank/开源项目榜_中国项目.csv"
}
},
"schema": {
"repo_github_user_info": {
"file": "../GitHub/Userinfo.xlsx",
"sheet_name": "repo",
"desc": "开源项目Github贡献者信息",
"fields": [
{
"field_name": "actor_email",
"field_type": "str",
"desc": "用户邮箱"
},
{
"field_name": "sum_total",
"field_type": "int",
"desc": "用户累计Github项目贡献数"
},
{
"feild_name": "any_repo_path",
"field_type": "str",
"desc": "用户贡献过的任意一个Github仓库路径"
},
{
"field_name": "any_commit_id",
"field_type": "str",
"desc": "用户在上述贡献过的Github仓库里的任意一个commit"
},
{
"field_name": "avatar_url",
"field_type": "str",
"desc": "用户头像"
},
{
"field_name": "name",
"field_type": "str",
"desc": "用户昵称"
},
{
"field_name": "company",
"field_type": "用户在Github上填写的公司名称",
"desc": ""
},
{
"field_name": "location",
"field_type": "str",
"desc": "用户在Github上填写的城市信息"
},
{
"field_name": "followers",
"field_type": "int",
"desc": "用户的被关注数"
},
{
"field_name": "author_id",
"field_type": "int",
"desc": "用户Github的ID"
},
{
"field_name": "type",
"field_type": "str",
"desc": "账号类型,人类或机器人"
},
{
"field_name": "login",
"field_type": "str",
"desc": "登录名"
},
{
"field_name": "created_at",
"field_type": "date",
"desc": "创建时间"
},
{
"field_name": "updated_at",
"field_type": "date",
"desc": "更新时间"
}
]
},
"repo_github_info": {
"file": "../Github-Repos.xlsx",
"sheet_name": "汇总",
"desc": "开源项目在Github上的项目交互数据",
"fields": [
{
"filed_name": "ID",
"field_type": "int",
"desc": "编号"
},
{
"field_name": "RepoID",
"field_type": "int",
"desc": "开源项目GithubID"
},
{
"field_name": "Org",
"field_type": "str",
"desc": "开源项目组织"
},
{
"field_name": "FullName",
"field_type": "str",
"desc": "开源项目全名"
},
{
"field_name": "Url",
"field_type": "str",
"desc": "开源项目url"
},
{
"field_name": "Repo",
"field_type": "str",
"desc": "仓库名"
},
{
"field_name": "Star",
"desc": "开源项目的Github Star 总数",
"field_type": "str"
},
{
"field_name": "Fork",
"desc": "开源项目的Github Fork 总数",
"field_type": "str"
},
{
"field_name": "Contributors",
"desc": "开源项目的Github 贡献者 总数",
"field_type": "str"
},
{
"field_name": "OSC-URL",
"desc": "开源项目的Github CodeChina URL",
"field_type": "str"
},
{
"field_name": "Region",
"desc": "开源项目所属的区域",
"field_type": "str"
}
]
},
"repo_csdn_trends": {
"file": "../CSDN/repo-csdn-trends.xlsx",
"sheet_name": "Sheet1",
"desc": "开源项目在CSDN站内指数数据",
"fields": [
{
"field_name": "repo_name",
"field_type": "str",
"desc": "开源项目名称"
},
{
"field_name": "2020-10-01 00:00:00",
"field_type": "int",
"desc": "月份第一天时间"
}
]
},
"repo_github_active_trends": {
"file": "../PingCAP/项目活跃度变化.csv",
"desc": "开源项目在Github上的月活跃度数据",
"fields": [
{
"filed_name": "event_month",
"field_type": "date",
"desc": "交互数据汇总月份"
},
{
"field_name": "repo_name",
"field_type": "str",
"desc": "开源项目名称"
},
{
"field_name": "push_count",
"field_type": "int",
"desc": "开源项目当月 git push 次数"
},
{
"field_name": "pr_count",
"field_type": "int",
"desc": "开源项目当月 git pr 次数"
},
{
"field_name": "issue_count",
"field_type": "int",
"desc": "开源项目当月 git issue 个数"
},
{
"field_name": "creator_count",
"field_type": "int",
"desc": "开源项目当月 git操作 创建者总数"
}
]
},
"repo_github_popular_trends": {
"file": "../PingCAP/项目受欢迎度变化.csv",
"desc": "开源项目在Github上的月收欢迎程度数据",
"fields": [
{
"filed_name": "event_month",
"field_type": "date",
"desc": "交互数据汇总月份"
},
{
"field_name": "repo_name",
"field_type": "str",
"desc": "开源项目名称"
},
{
"field_name": "push_count",
"field_type": "int",
"desc": "开源项目当月 git push 次数"
},
{
"field_name": "pr_count",
"field_type": "int",
"desc": "开源项目当月 git pr 次数"
},
{
"field_name": "issue_count",
"field_type": "int",
"desc": "开源项目当月 git issue 个数"
},
{
"field_name": "creator_count",
"field_type": "int",
"desc": "开源项目当月 git操作 创建者总数"
}
]
}
},
} }
def main(): def load_repo_github_user_info(config, ctx):
schema = config["schema"]
df = pd.read_excel(
schema["repo_github_user_info"]["file"],
sheet_name=schema["repo_github_user_info"]["sheet_name"])
print(df.head())
ctx["repo_github_user_info"] = df
def load_repo_github_info(config, ctx):
schema = config["schema"]
df = pd.read_excel(
schema["repo_github_info"]["file"],
sheet_name=schema["repo_github_info"]["sheet_name"])
print(df.head())
ctx["repo_github_info"] = df
def load_repo_csdn_trends(config, ctx):
schema = config["schema"]
df = pd.read_excel(
schema["repo_csdn_trends"]["file"],
sheet_name=schema["repo_github_info"]["sheet_name"])
print(df.head())
ctx["repo_csdn_trends"] = df
def load_repo_commit_rank(config, ctx):
schema = config["schema"]
df = pd.read_csv(schema["repo_commit_rank"]["file"])
print(df.columns)
ctx["repo_commit_rank"] = df
def load_repo_github_active_trends(config, ctx):
schema = config["schema"]
df = pd.read_csv(schema["repo_github_active_trends"]["file"])
print(df.columns)
ctx["repo_github_active_trends"] = df
def rank_personal_top_n(config, ctx):
pass
def rank_company_top_n(config, ctx):
pass pass
def rank_repo_top_n(config, ctx):
pass
def main(config):
ctx = {}
load_repo_github_user_info(config, ctx)
load_repo_github_info(config, ctx)
load_repo_csdn_trends(config, ctx)
load_repo_commit_rank(config, ctx)
load_repo_github_active_trends(config, ctx)
rank_personal_top_n(config, ctx)
rank_company_top_n(config, ctx)
rank_repo_top_n(config, ctx)
if __name__ == "__main__": if __name__ == "__main__":
main() main(config)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册