# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)
## Scleral spur localization Baseline （FCN)

- To keep model training stable, images with coordinate == -1, were removed.

- For real inference, you MIGHT keep all images in val_file_path file.

## Training

- Assume `Training100.zip` and `Validation_ASOCT_Image.zip` are stored @ `./AGE_challenge Baseline/datasets/`
- Assume `weights` are stored @ `./AGE_challenge Baseline/weights/`

### Download ImageNet weight

In [1]:
# https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification
!rm ../weights/ResNet34_pretrained.tar 
!rm -rf ../weights/ResNet34_pretrained

!wget -P ../weights/ https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar 
!tar xvf ../weights/ResNet34_pretrained.tar -C ../weights/ > /dev/null # silent
!rm ../weights/ResNet34_pretrained/fc*

--2019-08-06 14:16:30--  https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar
Resolving paddle-imagenet-models-name.bj.bcebos.com (paddle-imagenet-models-name.bj.bcebos.com)... 111.206.47.194, 202.106.5.21
Connecting to paddle-imagenet-models-name.bj.bcebos.com (paddle-imagenet-models-name.bj.bcebos.com)|111.206.47.194|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 87470080 (83M) [application/x-tar]
Saving to: ‘../weights/ResNet34_pretrained.tar’


2019-08-06 14:17:14 (1.93 MB/s) - ‘../weights/ResNet34_pretrained.tar’ saved [87470080/87470080]



### Main Code

In [2]:
import os, random, functools, math
import cv2
import numpy as np
import time

In [3]:
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as FL
import paddle.fluid.optimizer as FO
fluid.install_check.run_check()

Running Verify Fluid Program ... 
Your Paddle Fluid works well on SINGLE GPU or CPU.
Your Paddle Fluid works well on MUTIPLE GPU or CPU.
Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now


In [4]:
# preprocess: extract left/right label col in Training100_Location.xlsx
# save to train_csv file
data_root_path = "../datasets/Training100/"
image_path = os.path.join(data_root_path, "ASOCT_Image_loc")

train_file_path = os.path.join(data_root_path, "loc_train_split.csv")
val_file_path = os.path.join(data_root_path, "loc_val_split.csv")

In [5]:
BATCH_SIZE = 32
THREAD = 8
BUF_SIZE = 32

In [6]:
# Remove last global pooling and fullyconnect layer to enable FCN arch.
# Standard ResNet Implement: 
# https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/image_classification/models/resnet.py
from resnet_modified import *

### Define Data Loader

In [7]:
def vflip_image(image):
    return cv2.flip(image, flipCode=1)

def gaussian_k(x0,y0, sigma, width, height):
    """ Make a square gaussian kernel centered at (x0, y0) with sigma as SD.
    """
    x = np.arange(0, width, 1, float) ## (width,)
    y = np.arange(0, height, 1, float)[:, np.newaxis] ## (height,1)
    return np.exp(-((x-x0)**2 + (y-y0)**2) / (2*sigma**2))

def generate_hm(height, width, point, s=10):
    """ Generate a full Heap Map for every landmarks in an array
    Args:
        height    : The height of Heat Map (the height of target output)
        width     : The width  of Heat Map (the width of target output)
        point    : (x,y)
    """
    hm = gaussian_k(point[0], point[1], s, height, width)
    return hm

In [8]:
def reader(img_path, file_list, batch_size=32, shuffle=True, shuffle_seed=42):
    def read_file_list():
        batch_data = []
        np.random.shuffle(file_list)
        for line in file_list:
            file_name, p_x, p_y = line.split(",")
            batch_data.append([file_name, float(p_x), float(p_y)])
            if len(batch_data) == batch_size:
                yield batch_data
                batch_data = []
        if len(batch_data) != 0:
            yield batch_data
    return read_file_list

def process_batch_data(input_data, mode, rotate=True, flip=True):
    batch_data = []
    for sample in input_data:
        file, p_x, p_y = sample
        
        img = cv2.imread( file )
        img = img[:, :, ::-1].astype('float32') / 255.0
        
        ratio = 256.0 / img.shape[0]
        p_x, p_y = p_x * ratio, p_y * ratio
        img = cv2.resize(img, (256, 256))

        if mode == 'train':
            img = img + np.random.randn(*img.shape) * 0.3 / 255 
            if flip and np.random.randint(0,2):
                img = vflip_image(img)
                p_x = 256 - p_x
        else:
            pass
        
        hm = generate_hm(256, 256, (p_x, p_y))
        img = img.transpose((2, 0, 1))
        batch_data.append((img, hm))

    return batch_data

In [9]:
def data_loader(img_list, img_path, batch_size, order=False, mode='train'):
    data_reader = reader(img_path, img_list, batch_size)
    mapper = functools.partial(process_batch_data, mode=mode)
    
    data_reader = paddle.reader.shuffle(data_reader, 32)
    
    return paddle.reader.xmap_readers(
        mapper, data_reader, THREAD, BUF_SIZE, order=order)

In [10]:
with open(train_file_path) as flist:
    train_file_list = [os.path.join(image_path,line.strip()) for line in flist]

with open(val_file_path) as flist:
    val_file_list = [os.path.join(image_path,line.strip()) for line in flist]  

In [11]:
print(len(train_file_list))
print(len(val_file_list))
print(train_file_list[0])

2586
607
../datasets/Training100/ASOCT_Image_loc/T0056-10_left.jpg,228.83365553922314,466.95960107867666


In [12]:
np.random.shuffle(train_file_list)

In [13]:
train_dataloader = data_loader(train_file_list, image_path, BATCH_SIZE, False, mode='train')
val_dataloader = data_loader(val_file_list, image_path, BATCH_SIZE, True, mode='val')

### Define model (compute graph)

In [14]:
def network():
    data_shape = [3, 256, 256]
    
    model = ResNet34()
    
    input_feature = FL.data(name='pixel', shape=data_shape, dtype='float32')
    hm = FL.data(name='label', shape=data_shape[1:], dtype='float32')
    
    logit = model.net(input_feature, class_dim=1)
    pred_hm = FL.squeeze(
        FL.conv2d_transpose(logit, num_filters=1, output_size=256), axes=[1]) # Bs, 256,256
    
    reader = fluid.io.PyReader(feed_list=[input_feature, hm], 
                         capacity=64, iterable=True, use_double_buffer=True)

    cost = FL.square_error_cost(pred_hm, hm)
    loss = FL.mean(cost)
    
    return [loss, pred_hm, reader]

In [15]:
def calc_dist(pred_hm, hm):
    hm = np.array(hm)
    
    mean_dis = 0.
    for single_hm, single_pred_hm in zip(hm, pred_hm):
        # Find argmax_x, argmax_y from 2D tensor
        label_x, label_y = np.unravel_index(single_hm.argmax(), single_hm.shape)
        pred_x, pred_y = np.unravel_index(single_pred_hm.argmax(), single_pred_hm.shape)
        mean_dis += np.sqrt((pred_x - label_x) ** 2 + (pred_y - label_y) ** 2)
    
    return mean_dis / hm.shape[0]

In [16]:
def train(use_cuda, params_dirname_prefix, pretrained_model=False, EPOCH_NUM=10):
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    val_prog = fluid.Program()

    with fluid.program_guard(train_prog, startup_prog):
        # fluid.unique_name.guard() to share parameters with test network
        with fluid.unique_name.guard():
            train_loss, train_output, train_reader = network()
            
            optimizer = fluid.optimizer.Adam(learning_rate=1e-4)
            optimizer.minimize(train_loss)
    
    # 定义预测网络
    with fluid.program_guard(val_prog, startup_prog):
        # Use fluid.unique_name.guard() to share parameters with train network
        with fluid.unique_name.guard():
            val_loss, val_output, val_reader = network()

    val_prog = val_prog.clone(for_test=True)

    train_loss.persistable = True
    val_loss.persistable = True
    val_output.persistable = True
            
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    if pretrained_model:
        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(
            exe, pretrained_model, main_program=train_prog, predicate=if_exist)

    train_reader.decorate_sample_list_generator( train_dataloader, places=place )
    val_reader.decorate_sample_list_generator( val_dataloader, places=place )

    # For training test cost
    def train_test(val_prog, val_reader):
        count = 0
        accumulated = [0,0]
        
        prediction = []
        label_values = []
        
        for tid, val_data in enumerate(val_reader()):
            avg_cost_np = exe.run(
                program=val_prog,
                feed=val_data,
                fetch_list=[val_loss, val_output],
                use_program_cache=True)
            accumulated = [
                x[0] + x[1][0] for x in zip(accumulated, avg_cost_np)
            ]
            prediction.append(avg_cost_np[1])
            label_values.append( np.array(val_data[0]['label']) )
            count += 1

        prediction = np.concatenate(prediction, 0)
        label_values = np.concatenate(label_values, 0)

        mean_dis = calc_dist(prediction, label_values)
        
        return [x / count for x in accumulated], mean_dis

    # main train loop.
    def train_loop():
        step = 0
        best_dist = 65536.

        for pass_id in range(EPOCH_NUM):
            data_load_time = time.time()
            for step_id, data_train in enumerate(train_reader()):
                data_load_costtime = time.time() - data_load_time
                start_time = time.time()
                avg_loss_value = exe.run(
                    train_prog,
                    feed=data_train,
                    fetch_list=[train_loss, train_output], 
                    use_program_cache=True)
                cost_time = time.time() - start_time
                if step_id % 50 == 0:
                    mean_dis = calc_dist(avg_loss_value[1], data_train[0]['label'])
                    print("Pass %d, Epoch %d, Cost %f, EuDis %f, Time %f, LoadTime %f" % (
                        step_id, pass_id, avg_loss_value[0], mean_dis, cost_time, data_load_costtime))
                else:
                    pass
                step += 1
                data_load_time = time.time()

            avg_cost_test, avg_dist_test = train_test(val_prog, val_reader)

            print('Test with Epoch {0}, Loss {1:2.4}, EuDis {2:2.4}'.format(
                pass_id, avg_cost_test[0], avg_dist_test))

            if avg_dist_test < best_dist:
                best_dist = avg_dist_test
                print("\nBest Dis, Checkpoint Saved!\n")
                if not os.path.isdir(params_dirname_prefix+"_best/"):
                    os.makedirs(params_dirname_prefix+"_best/")
                fluid.io.save_persistables(exe, params_dirname_prefix+"_best/", main_program=train_prog)

            if not os.path.isdir(params_dirname_prefix+"_checkpoint/"):
                os.makedirs(params_dirname_prefix+"_checkpoint/")
            fluid.io.save_persistables(exe, params_dirname_prefix+"_checkpoint/", main_program=train_prog)
    train_loop()

In [None]:
# download imagenet pretrain weight from:
# https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification
# remove ResNet34_pretrained/fc*
train(use_cuda=True, params_dirname_prefix="../weights/loc_fcn", 
        pretrained_model="../weights/ResNet34_pretrained", EPOCH_NUM=40)