提交 d8d64f00 编写于 作者: M MaoXianxin

gitee count and fix code

上级 5a6ec2da
import pandas as pd
from collections import Counter
filenameStr = '/home/mao/Documents/GiTeeData.csv'
csv = pd.read_csv(filenameStr, encoding='utf-8', dtype=str, usecols=['tags'])
print(len(csv))
tags = csv['tags']
tags = tags.dropna(how='any')
tags_list = []
for index, string in enumerate(tags):
if len(string.split(',')) >=2:
# print(index)
tags_list += [tag.strip() for tag in string.split(',')]
else:
tags_list.append(string.strip())
tags_count = Counter(tags_list)
tags_dict = dict(reversed(sorted(tags_count.items(), key=lambda item: int(item[1]))))
pd.DataFrame.from_dict({'tags': list(tags_dict.keys()), 'count': list(tags_dict.values())}).to_csv('gitee_tags.csv', index=False)
\ No newline at end of file
......@@ -14,5 +14,6 @@ for index, project in enumerate(project_path):
csv['group_name'] = group_names
csv['project_name'] = project_names
csv.to_csv('test.csv', columns=['project path', 'visits', 'group_name', 'project_name'], index=False)
print(len(csv))
\ No newline at end of file
drop_none = csv.dropna(how='any')
drop_none.to_csv('test.csv', columns=['project path', 'visits', 'group_name', 'project_name'], index=False)
print(len(drop_none))
\ No newline at end of file
......@@ -3,25 +3,26 @@ import pandas as pd
csv = pd.read_csv('test.csv')
print(len(csv))
visists = csv['visits'].to_numpy(dtype=float)
# visists = csv['visits'].to_numpy(dtype=float)
#
# sum = 0
# for index, visit in enumerate(visists):
# sum += visit
# if sum > visists.sum() * 0.6:
# print(index)
# break
#
# csv_split = csv[:index+1]
# csv_split.to_csv('csv_split.csv', columns=['project path', 'group_name', 'project_name', 'visits'], index=False)
# print(len(csv_split))
sum = 0
for index, visit in enumerate(visists):
sum += visit
if sum > visists.sum() * 0.6:
visists = csv['visits'].to_numpy(dtype=float)
for index, num in enumerate(visists):
if num < 60:
print(index)
break
csv_split = csv[:index+1]
csv_split = csv[:index]
csv_split.to_csv('csv_split.csv', columns=['project path', 'group_name', 'project_name', 'visits'], index=False)
print(len(csv_split))
# visists = csv['visits'].to_numpy(dtype=float)
# for index, num in enumerate(visists):
# if num < 60:
# print(index)
# break
#
# csv_split = csv[:index]
# csv_split.to_csv('csv_split.csv', columns=['project path', 'group_name', 'project_name', 'visits'], index=False)
\ No newline at end of file
print(len(csv_split))
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册