gitee_tags_count.py 725 字节
Newer Older
M
MaoXianxin 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
import pandas as pd
from collections import Counter

filenameStr = '/home/mao/Documents/GiTeeData.csv'
csv = pd.read_csv(filenameStr, encoding='utf-8', dtype=str, usecols=['tags'])
print(len(csv))


tags = csv['tags']
tags = tags.dropna(how='any')
tags_list = []


for index, string in enumerate(tags):
    if len(string.split(',')) >=2:
        # print(index)
        tags_list += [tag.strip() for tag in string.split(',')]
    else:
        tags_list.append(string.strip())


tags_count = Counter(tags_list)
tags_dict = dict(reversed(sorted(tags_count.items(), key=lambda item: int(item[1]))))
pd.DataFrame.from_dict({'tags': list(tags_dict.keys()), 'count': list(tags_dict.values())}).to_csv('gitee_tags.csv', index=False)