diff --git a/githublink_process/add_group_project_name.py b/githublink_process/add_group_project_name.py index 327f0ccd79a80b63d8bc02db9cfce23b7d15add7..47a46ae4f04b557294153cf9c29315e8bd5d6046 100644 --- a/githublink_process/add_group_project_name.py +++ b/githublink_process/add_group_project_name.py @@ -8,8 +8,8 @@ project_path = csv['project path'] group_names = [] project_names = [] for index, project in enumerate(project_path): - group_names.append(project.split('/')[3]) - project_names.append(project.split('/')[4]) + group_names.append(str(project.split('/')[3])) + project_names.append(str(project.split('/')[4])) csv['group_name'] = group_names csv['project_name'] = project_names diff --git a/githublink_process/mirror_automate.py b/githublink_process/mirror_automate.py index 0cf6806db35e79e7d85780e9b61360e444c21e2b..1878cab1e85afe3c95ef5b04e765942c7addec16 100644 --- a/githublink_process/mirror_automate.py +++ b/githublink_process/mirror_automate.py @@ -3,11 +3,11 @@ import yaml import pandas as pd -csv = pd.read_csv('mirrors_needed.csv').head(20) +csv = pd.read_csv('mirrors_needed.csv') for index, group in enumerate(csv['group_name']): - group_dir = '/home/mao/Downloads/mirrors-settings/projects/' + str(group) + group_dir = '/home/mao/Downloads/mirrors-settings/projects/' + str(group).lower() if os.path.exists(group_dir) and os.path.exists(group_dir + '/imports.yml'): print(True) group_imports_yml = group_dir + '/imports.yml' @@ -31,8 +31,9 @@ for index, group in enumerate(csv['group_name']): except yaml.YAMLError as exec: print(exec) else: - os.makedirs(group_dir, exist_ok=True) - group_imports_yml = group_dir + '/imports.yml' + group_dir_upper = '/home/mao/Downloads/mirrors-settings/projects/' + str(group) + os.makedirs(group_dir_upper, exist_ok=True) + group_imports_yml = group_dir_upper + '/imports.yml' add_project_dict = {'group': {'name': str(group), 'description': '', 'projects': [{'name': str(csv['project_name'][index]), 'description': '', 'mirrorRelease': False, 'topic': ''}, ]}} diff --git a/githublink_process/mirror_group_project_extract.py b/githublink_process/mirror_group_project_extract.py index cd2f27683dbba5d8b7649dec4171f29c39af3918..51e4af4e7f64f258e18a440cea67a7c3505ee37b 100644 --- a/githublink_process/mirror_group_project_extract.py +++ b/githublink_process/mirror_group_project_extract.py @@ -27,8 +27,8 @@ for i, group in enumerate(groups): group_name = parse_yml['group']['name'] for name in parse_yml['group']['projects']: # print(group_name, name['name']) - group_list.append(group_name) - project_list.append(name['name']) + group_list.append(group_name.lower()) + project_list.append(name['name'].lower()) mirrors = pd.DataFrame.from_dict({'group_name': group_list, 'project_name': project_list}) diff --git a/githublink_process/mirrors_needed.py b/githublink_process/mirrors_needed.py index 3bd58385a886a68b6f51d25b2618cbe12abb51f9..da0c172ede1528e94f708c900fc86f3ad979768d 100644 --- a/githublink_process/mirrors_needed.py +++ b/githublink_process/mirrors_needed.py @@ -9,10 +9,10 @@ add_project_path = [] for index, group_project in enumerate(zip(csv2['group_name'], csv2['project_name'])): # print(group_project[0], group_project[1]) - if group_project[0] in list(csv1['group_name']): + if str(group_project[0]).lower() in list(csv1['group_name']): # print(group_project[0]) # break - if group_project[1] in list(csv1['project_name']): + if str(group_project[1]).lower() in list(csv1['project_name']): pass else: add_group_list.append(group_project[0]) diff --git a/githublink_process/url_status.py b/githublink_process/url_status.py new file mode 100644 index 0000000000000000000000000000000000000000..70a5617a95e5a37f53266f1dc2d760fbfbcbc778 --- /dev/null +++ b/githublink_process/url_status.py @@ -0,0 +1,38 @@ +import time + +import requests +import pandas as pd + +filenameStr = './mirrors_needed.csv' +csv = pd.read_csv(filenameStr, encoding='ISO-8859-1', dtype=str, usecols=['group_name', 'project_name', 'project path']) +print(len(csv)) +project_path = csv['project path'] + +cookies = {} +headers = {} +delete_index_list = [] + +def getHttpStatusCode(url): + try: + request = requests.get(url, headers=headers, cookies=cookies) + httpStatusCode = request.status_code + return httpStatusCode + except requests.exceptions.HTTPError as e: + return e + +if __name__ == "__main__": + for index, line in enumerate(project_path): + if index > 1999: + break + try: + status = getHttpStatusCode(line) + time.sleep(0.5) + if status != 200: + delete_index_list.append(index) + except Exception as e: + print(e) + print(delete_index_list) + project_path[delete_index_list] = None + drop_none = csv.dropna(how='any') + drop_none.to_csv('mirrors_needed.csv', index=False) + print(len(drop_none))