{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)\n", "## Angle closure classification Baseline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## requirement install" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install xlrd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Zip File Extract\n", "\n", "Assume `Training100.zip` and `Validation_ASOCT_Image.zip` are stored @ `./AGE_challenge Baseline/datasets/`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!unzip -q ../datasets/Training100.zip -d ../datasets/\n", "!unzip -q ../datasets/Validation_ASOCT_Image.zip -d ../datasets/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Explore Data & Train/Val split" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import csv\n", "import matplotlib.pyplot as plt\n", "import cv2\n", "import os, shutil\n", "import pprint\n", "import pandas as pd\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "data_root_path = \"../datasets/Training100/\"\n", "xlsx_file_path = os.path.join(data_root_path, \"Training100_Location.xlsx\")\n", "\n", "# Load\n", "image_path = os.path.join(data_root_path, \"ASOCT_Image\")\n", "label_file_path = os.path.join(data_root_path, \"train_cls.csv\")\n", "\n", "# Save\n", "train_file_path = os.path.join(data_root_path, \"cls_train_split.csv\")\n", "val_file_path = os.path.join(data_root_path, \"cls_val_split.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ASOCT_NameLeft_LabelX1Y1Right_LabelX2Y2
0T0056-10.jpg1228.833656466.95960111870.803864451.592300
1T0047-06.jpg1207.935545525.93876411792.231404432.521881
2T0066-15.jpg0239.372633476.27392501899.775568501.007410
3T0025-15.jpg0177.708404545.65593501862.380363439.228928
4T0088-06.jpg0285.256170735.07601401884.122651767.858589
\n", "
" ], "text/plain": [ " ASOCT_Name Left_Label X1 Y1 Right_Label X2 \\\n", "0 T0056-10.jpg 1 228.833656 466.959601 1 1870.803864 \n", "1 T0047-06.jpg 1 207.935545 525.938764 1 1792.231404 \n", "2 T0066-15.jpg 0 239.372633 476.273925 0 1899.775568 \n", "3 T0025-15.jpg 0 177.708404 545.655935 0 1862.380363 \n", "4 T0088-06.jpg 0 285.256170 735.076014 0 1884.122651 \n", "\n", " Y2 \n", "0 451.592300 \n", "1 432.521881 \n", "2 501.007410 \n", "3 439.228928 \n", "4 767.858589 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xlsx_file = pd.read_excel(xlsx_file_path)\n", "xlsx_file.to_csv(label_file_path, \n", " index=False, columns=['ASOCT_Name', 'Left_Label', 'Right_Label'])\n", "xlsx_file.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[['ASOCT_Name', 'Left_Label', 'Right_Label'],\n", " ['T0056-10.jpg', '1', '1'],\n", " ['T0047-06.jpg', '1', '1'],\n", " ['T0066-15.jpg', '0', '0'],\n", " ['T0025-15.jpg', '0', '0']]\n" ] } ], "source": [ "data_list = []\n", "\n", "with open(label_file_path,'r') as f: \n", " lines=csv.reader(f) \n", " for key, line in enumerate(lines): \n", " data_list.append(line)\n", " \n", "pprint.pprint(data_list[:5])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1280, 320]\n", "[1280, 320]\n" ] } ], "source": [ "# left, right\n", "# negative sample (label==0): 1280, 1280\n", "# positive sample (label==1): 320, 320\n", "left_label_counter = [0, 0]\n", "right_label_counter = [0, 0]\n", "\n", "for line in data_list[1:]:\n", " file_name, l_label, r_label = line\n", " left_label_counter[int(l_label)] += 1\n", " right_label_counter[int(r_label)] += 1\n", " \n", "print(left_label_counter)\n", "print(right_label_counter)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Left label == Right label, in **TRAINING SET**\n", "for line in data_list[1:]:\n", " file_name, l_label, r_label = line\n", " if int(l_label) != int(r_label):\n", " print(line)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Train/Val Split" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def train_val_split(data_list, train_ratio=0.8, shuffle_seed=42):\n", " testee_list = list(set( [line[0].split(\"-\")[0] for line in data_list[1:]] ))\n", " \n", " # Split by patient id, prevent data leakage\n", " val_testee_idx = np.random.choice(testee_list, int(len(testee_list) * (1-train_ratio)), replace=False)\n", "\n", " train_list = []\n", " val_list = []\n", " \n", " for line in data_list[1:]:\n", " file_name, _, _ = line\n", " if file_name.split(\"-\")[0] in val_testee_idx:\n", " val_list.append(line)\n", " else:\n", " train_list.append(line)\n", " \n", " return train_list, val_list" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1296\n", "304\n" ] } ], "source": [ "train_data_list, val_data_list = train_val_split(data_list)\n", "print(len(train_data_list))\n", "print(len(val_data_list))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# Write to files\n", "with open(train_file_path, \"w+\") as f:\n", " for line in train_data_list:\n", "# file_name, l_label, r_label = line\n", " f.write(\"{},{},{}\\n\".format(*line))\n", " \n", "with open(val_file_path, \"w+\") as f:\n", " for line in val_data_list:\n", " f.write(\"{},{},{}\\n\".format(*line))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 1 }