# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)
## Angle closure classification Baseline

## requirement install

In [None]:
!pip install xlrd

## Zip File Extract

Assume `Training100.zip` and `Validation_ASOCT_Image.zip` are stored @ `./AGE_challenge Baseline/datasets/`

In [None]:
!unzip -q ../datasets/Training100.zip -d ../datasets/
!unzip -q ../datasets/Validation_ASOCT_Image.zip -d ../datasets/

# Explore Data & Train/Val split

In [1]:
import numpy as np
import csv
import matplotlib.pyplot as plt
import cv2
import os, shutil
import pprint
import pandas as pd

%matplotlib inline

In [2]:
data_root_path = "../datasets/Training100/"
xlsx_file_path = os.path.join(data_root_path, "Training100_Location.xlsx")

# Load
image_path = os.path.join(data_root_path, "ASOCT_Image")
label_file_path = os.path.join(data_root_path, "train_cls.csv")

# Save
train_file_path = os.path.join(data_root_path, "cls_train_split.csv")
val_file_path = os.path.join(data_root_path, "cls_val_split.csv")

In [3]:
xlsx_file = pd.read_excel(xlsx_file_path)
xlsx_file.to_csv(label_file_path, 
                    index=False, columns=['ASOCT_Name', 'Left_Label', 'Right_Label'])
xlsx_file.head()

Unnamed: 0,ASOCT_Name,Left_Label,X1,Y1,Right_Label,X2,Y2
0,T0056-10.jpg,1,228.833656,466.959601,1,1870.803864,451.5923
1,T0047-06.jpg,1,207.935545,525.938764,1,1792.231404,432.521881
2,T0066-15.jpg,0,239.372633,476.273925,0,1899.775568,501.00741
3,T0025-15.jpg,0,177.708404,545.655935,0,1862.380363,439.228928
4,T0088-06.jpg,0,285.25617,735.076014,0,1884.122651,767.858589


In [4]:
data_list = []

with open(label_file_path,'r') as f:  
    lines=csv.reader(f)  
    for key, line in enumerate(lines):  
        data_list.append(line)
        
pprint.pprint(data_list[:5])

[['ASOCT_Name', 'Left_Label', 'Right_Label'],
 ['T0056-10.jpg', '1', '1'],
 ['T0047-06.jpg', '1', '1'],
 ['T0066-15.jpg', '0', '0'],
 ['T0025-15.jpg', '0', '0']]


In [5]:
#                             left, right
# negative sample (label==0): 1280, 1280
# positive sample (label==1): 320,  320
left_label_counter = [0, 0]
right_label_counter = [0, 0]

for line in data_list[1:]:
    file_name, l_label, r_label = line
    left_label_counter[int(l_label)] += 1
    right_label_counter[int(r_label)] += 1
    
print(left_label_counter)
print(right_label_counter)

[1280, 320]
[1280, 320]


In [6]:
# Left label == Right label, in **TRAINING SET**
for line in data_list[1:]:
    file_name, l_label, r_label = line
    if int(l_label) != int(r_label):
        print(line)

### Train/Val Split

In [7]:
def train_val_split(data_list, train_ratio=0.8, shuffle_seed=42):
    testee_list = list(set( [line[0].split("-")[0] for line in data_list[1:]] ))
    
    # Split by patient id, prevent data leakage
    val_testee_idx = np.random.choice(testee_list, int(len(testee_list) * (1-train_ratio)), replace=False)

    train_list = []
    val_list = []
    
    for line in data_list[1:]:
        file_name, _, _ = line
        if file_name.split("-")[0] in val_testee_idx:
            val_list.append(line)
        else:
            train_list.append(line)
            
    return train_list, val_list

In [8]:
train_data_list, val_data_list = train_val_split(data_list)
print(len(train_data_list))
print(len(val_data_list))

1296
304


In [9]:
# Write to files
with open(train_file_path, "w+") as f:
    for line in train_data_list:
#         file_name, l_label, r_label = line
        f.write("{},{},{}\n".format(*line))
    
with open(val_file_path, "w+") as f:
    for line in val_data_list:
        f.write("{},{},{}\n".format(*line))