提交 85fe3925 编写于 作者: M MaoXianxin

Improve the code

上级 2b040944
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
"execution_count": 2, "execution_count": 2,
"outputs": [], "outputs": [],
"source": [ "source": [
"csv = pd.read_csv('111111.csv', usecols=['project full path', 'project url', 'notification count', 'star count'])" "csv = pd.read_csv('weekly_growth.csv', usecols=['project full path', 'project url', 'notification count', 'star count'])"
], ],
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
......
import pandas as pd
import matplotlib.pyplot as plt
import argparse
# Just change the parameters here
parser = argparse.ArgumentParser()
parser.add_argument('--sort_values', default='notification count', type=str)
parser.add_argument('--result_name', default='weekly_growth.csv', type=str)
parser.add_argument('--use_cols', default=['project full path', 'project url', 'notification count', 'star count'], type=list)
args = parser.parse_args()
csv = pd.read_csv(args.result_name, usecols=args.use_cols)
dict_notification = {}
for count in csv[args.sort_values].unique():
num = len(csv[csv[args.sort_values] == count].index)
dict_notification[str(count)] = num
dict_notification = dict(sorted(dict_notification.items(), key=lambda item: int(item[0])))
print(dict_notification)
filter_dict = {}
for notification_count, num in dict_notification.items():
if int(notification_count) < 10:
continue
filter_dict[notification_count] = num
plt.bar(filter_dict.keys(), filter_dict.values(), width=3, color='g')
plt.show()
plt.plot(filter_dict.keys(), filter_dict.values())
plt.show()
\ No newline at end of file
...@@ -2,20 +2,22 @@ import pandas as pd ...@@ -2,20 +2,22 @@ import pandas as pd
import numpy as np import numpy as np
import argparse import argparse
# Just change the parameters here
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--csv1_path', default='0611-projects-star-and-notification.csv', type=str) parser.add_argument('--csv1_path', default='0611-projects-star-and-notification.csv', type=str)
parser.add_argument('--csv2_path', default='0618-projects-star-and-notification.csv', type=str) parser.add_argument('--csv2_path', default='0618-projects-star-and-notification.csv', type=str)
parser.add_argument('--sort_values', default='project full path', type=str) parser.add_argument('--sort_values', default='project full path', type=str)
parser.add_argument('--result_name', default='weekly_growth.csv', type=str) parser.add_argument('--result_name', default='weekly_growth.csv', type=str)
parser.add_argument('--calc_index', default=['notification count', 'star count'], type=list)
args = parser.parse_args() args = parser.parse_args()
csv1 = pd.read_csv(args.csv1_path) csv1 = pd.read_csv(args.csv1_path)
csv2 = pd.read_csv(args.csv2_path) csv2 = pd.read_csv(args.csv2_path)
csv1 = csv1.sort_values(args.sort_values) csv1 = csv1.sort_values(args.sort_values)
csv2 = csv2.sort_values(args.sort_values) csv2 = csv2.sort_values(args.sort_values)
projectNames1 = [] projectNames1 = []
for i in range(len(csv1)): for i in range(len(csv1)):
...@@ -24,32 +26,40 @@ for i in range(len(csv1)): ...@@ -24,32 +26,40 @@ for i in range(len(csv1)):
projectNames2 = [] projectNames2 = []
for i in range(len(csv2)): for i in range(len(csv2)):
projectNames2.append(csv2.iloc[i, 0]) projectNames2.append(csv2.iloc[i, 0])
in2notin1 = [] in2notin1 = []
for i in range(len(csv2)): for i in range(len(csv2)):
if not csv2.iloc[i, 0] in projectNames1: if not csv2.iloc[i, 0] in projectNames1:
in2notin1.append(csv2.iloc[i, 0]) in2notin1.append(csv2.iloc[i, 0])
for name in in2notin1: for name in in2notin1:
csv2.drop(csv2[csv2['project full path'] == name].index, inplace=True) csv2.drop(csv2[csv2['project full path'] == name].index, inplace=True)
in1notin2 = []
for i in range(len(csv1)):
if not csv1.iloc[i, 0] in projectNames2:
in1notin2.append(csv1.iloc[i, 0])
for name in in1notin2:
csv1.drop(csv1[csv1['project full path'] == name].index, inplace=True)
for i in range(len(csv1)): for i in range(len(csv1)):
if csv1.iloc[i, 0] == csv2.iloc[i, 0]: if csv1.iloc[i, 0] == csv2.iloc[i, 0]:
continue continue
print(i, csv1.iloc[i, 0], csv2.iloc[i, 0]) print(i, csv1.iloc[i, 0], csv2.iloc[i, 0])
csv1.index = np.asarray(list(range(0, len(csv1)))) csv1.index = np.asarray(list(range(0, len(csv1))))
csv2.index = np.asarray(list(range(0, len(csv1)))) csv2.index = np.asarray(list(range(0, len(csv1))))
notification_sub = csv2['notification count'] - csv1['notification count'] # One week data growth calculation
csv1['notification count'] = notification_sub calculation_list = args.calc_index
for calc_index in calculation_list:
sub = csv2[calc_index] - csv1[calc_index]
csv1[calc_index] = sub
star_sub = csv2['star count'] - csv1['star count']
csv1['star count'] = star_sub
csv1.to_csv(args.result_name, index=False) csv1.to_csv(args.result_name, index=False)
\ No newline at end of file
...@@ -2,11 +2,12 @@ import yaml ...@@ -2,11 +2,12 @@ import yaml
import pandas as pd import pandas as pd
import argparse import argparse
# Just change the parameters here
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--index_yml', default='/home/mao/Downloads/operation-work/explore/index.yml', type=str) parser.add_argument('--index_yml', default='/home/mao/Downloads/operation-work/explore/index.yml', type=str)
parser.add_argument('--sort_values', default='notification count', type=str) parser.add_argument('--sort_values', default='notification count', type=str)
parser.add_argument('--result_name', default='weekly_growth.csv', type=str) parser.add_argument('--result_name', default='weekly_growth.csv', type=str)
parser.add_argument('--use_cols', default=['project full path', 'project url', 'notification count', 'star count'], type=list)
args = parser.parse_args() args = parser.parse_args()
with open(args.index_yml, 'r', encoding='utf-8') as stream: with open(args.index_yml, 'r', encoding='utf-8') as stream:
...@@ -15,32 +16,14 @@ with open(args.index_yml, 'r', encoding='utf-8') as stream: ...@@ -15,32 +16,14 @@ with open(args.index_yml, 'r', encoding='utf-8') as stream:
print(parse_yml) print(parse_yml)
except yaml.YAMLError as exec: except yaml.YAMLError as exec:
print(exec) print(exec)
parse_yml['projects']
csv = pd.read_csv(args.result_name, usecols=['project full path', 'project url', 'notification count', 'star count'])
csv.head()
csv = pd.read_csv(args.result_name, usecols=args.use_cols)
csv = csv.sort_values(args.sort_values, ascending=False) csv = csv.sort_values(args.sort_values, ascending=False)
csv.head()
csv.iloc[0, 0]
for i in range(len(parse_yml['projects'])): for i in range(len(parse_yml['projects'])):
parse_yml['projects'][i] = csv.iloc[i+25, 0] parse_yml['projects'][i] = csv.iloc[i, 0]
parse_yml['projects']
with open(args.index_yml, 'w', encoding='utf-8') as stream: with open(args.index_yml, 'w', encoding='utf-8') as stream:
try: try:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册