Add AutoAugment (#309)

* add autoaugment, which is validated on faster-rcnn

Add AutoAugment (#309)
* add autoaugment, which is validated on faster-rcnn
baecabee · littletomatodonkey · GitHub · ad603173 · baecabee · baecabee
5 changed file
--- a/configs/autoaugment/README.md
+++ b/configs/autoaugment/README.md
+# Learning Data Augmentation Strategies for Object Detection
+## Introduction
+- Learning Data Augmentation Strategies for Object Detection: [https://arxiv.org/abs/1906.11172](https://arxiv.org/abs/1906.11172)
+```
+@article{Zoph2019LearningDA,
+  title={Learning Data Augmentation Strategies for Object Detection},
+  author={Barret Zoph and Ekin Dogus Cubuk and Golnaz Ghiasi and Tsung-Yi Lin and Jonathon Shlens and Quoc V. Le},
+  journal={ArXiv},
+  year={2019},
+  volume={abs/1906.11172}
+}
+```
+## Model Zoo
+| Backbone                | Type     | AutoAug policy | Image/gpu | Lr schd | Inf time (fps) | Box AP | Mask AP |                           Download                           |
+| :---------------------- | :-------------:| :-------: | :-------: | :-----: | :------------: | :----: | :-----: | :----------------------------------------------------------: |
+| ResNet50-vd-FPN         | Faster     |   v1 |  2     |   3x    |     22.800     |  39.9  |    -    | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_aa_3x.tar) |
+| ResNet101-vd-FPN         | Faster     |   v1 |  2     |   3x    |     17.652     |  42.5  |    -    | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_aa_3x.tar) |
--- a/configs/autoaugment/faster_rcnn_r101_vd_fpn_aa_3x.yml
+++ b/configs/autoaugment/faster_rcnn_r101_vd_fpn_aa_3x.yml
+architecture: FasterRCNN
+max_iters: 270000
+snapshot_iter: 30000
+use_gpu: true
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar
+weights: output/faster_rcnn_r101_vd_fpn_aa_3x/model_final
+metric: COCO
+num_classes: 81
+FasterRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  depth: 101
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  norm_type: bn
+  variant: d
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 2000
+    pre_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 1000
+    pre_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+TwoFCHead:
+  mlp_dim: 1024
+LearningRate:
+  base_lr: 0.02
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [180000, 240000]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+_READER_: '../faster_fpn_reader.yml'
+TrainReader:
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !RandomFlipImage
+    prob: 0.5
+  - !AutoAugmentImage
+    autoaug_type: v1
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    target_size: 800
+    max_size: 1333
+    interp: 1
+    use_cv2: true
+  - !Permute
+    to_bgr: false
+    channel_first: true
+  batch_size: 2
+  use_process: true
--- a/configs/autoaugment/faster_rcnn_r50_vd_fpn_aa_3x.yml
+++ b/configs/autoaugment/faster_rcnn_r50_vd_fpn_aa_3x.yml
+architecture: FasterRCNN
+max_iters: 270000
+snapshot_iter: 30000
+use_gpu: true
+log_smooth_window: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar
+weights: output/faster_rcnn_r50_vd_fpn_aa_3x/model_final
+metric: COCO
+num_classes: 81
+FasterRCNN:
+  backbone: ResNet
+  fpn: FPN
+  rpn_head: FPNRPNHead
+  roi_extractor: FPNRoIAlign
+  bbox_head: BBoxHead
+  bbox_assigner: BBoxAssigner
+ResNet:
+  depth: 50
+  feature_maps: [2, 3, 4, 5]
+  freeze_at: 2
+  norm_type: bn
+  variant: d
+FPN:
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
+FPNRPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  anchor_start_size: 32
+  max_level: 6
+  min_level: 2
+  num_chan: 256
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 2000
+    pre_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    post_nms_top_n: 1000
+    pre_nms_top_n: 1000
+FPNRoIAlign:
+  canconical_level: 4
+  canonical_size: 224
+  max_level: 5
+  min_level: 2
+  box_resolution: 7
+  sampling_ratio: 2
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+BBoxHead:
+  head: TwoFCHead
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    score_threshold: 0.05
+TwoFCHead:
+  mlp_dim: 1024
+LearningRate:
+  base_lr: 0.02
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [180000, 240000]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+_READER_: '../faster_fpn_reader.yml'
+TrainReader:
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !RandomFlipImage
+    prob: 0.5
+  - !AutoAugmentImage
+    autoaug_type: v1
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    target_size: 800
+    max_size: 1333
+    interp: 1
+    use_cv2: true
+  - !Permute
+    to_bgr: false
+    channel_first: true
+  batch_size: 2
+  use_process: true
--- a/ppdet/data/transform/autoaugment_utils.py
+++ b/ppdet/data/transform/autoaugment_utils.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Reference: 
+#   https://github.com/tensorflow/tpu/blob/master/models/official/detection/utils/autoaugment_utils.py
+"""AutoAugment util file."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import inspect
+import math
+from PIL import Image, ImageEnhance
+import numpy as np
+import os
+import sys
+import cv2
+from copy import deepcopy
+# This signifies the max integer that the controller RNN could predict for the
+# augmentation scheme.
+_MAX_LEVEL = 10.
+# Represents an invalid bounding box that is used for checking for padding
+# lists of bounding box coordinates for a few augmentation operations
+_INVALID_BOX = [[-1.0, -1.0, -1.0, -1.0]]
+def policy_v0():
+    """Autoaugment policy that was used in AutoAugment Detection Paper."""
+    # Each tuple is an augmentation operation of the form
+    # (operation, probability, magnitude). Each element in policy is a
+    # sub-policy that will be applied sequentially on the image.
+    policy = [
+        [('TranslateX_BBox', 0.6, 4), ('Equalize', 0.8, 10)],
+        [('TranslateY_Only_BBoxes', 0.2, 2), ('Cutout', 0.8, 8)],
+        [('Sharpness', 0.0, 8), ('ShearX_BBox', 0.4, 0)],
+        [('ShearY_BBox', 1.0, 2), ('TranslateY_Only_BBoxes', 0.6, 6)],
+        [('Rotate_BBox', 0.6, 10), ('Color', 1.0, 6)],
+    ]
+    return policy
+def policy_v1():
+    """Autoaugment policy that was used in AutoAugment Detection Paper."""
+    # Each tuple is an augmentation operation of the form
+    # (operation, probability, magnitude). Each element in policy is a
+    # sub-policy that will be applied sequentially on the image.
+    policy = [
+        [('TranslateX_BBox', 0.6, 4), ('Equalize', 0.8, 10)],
+        [('TranslateY_Only_BBoxes', 0.2, 2), ('Cutout', 0.8, 8)],
+        [('Sharpness', 0.0, 8), ('ShearX_BBox', 0.4, 0)],
+        [('ShearY_BBox', 1.0, 2), ('TranslateY_Only_BBoxes', 0.6, 6)],
+        [('Rotate_BBox', 0.6, 10), ('Color', 1.0, 6)],
+        [('Color', 0.0, 0), ('ShearX_Only_BBoxes', 0.8, 4)],
+        [('ShearY_Only_BBoxes', 0.8, 2), ('Flip_Only_BBoxes', 0.0, 10)],
+        [('Equalize', 0.6, 10), ('TranslateX_BBox', 0.2, 2)],
+        [('Color', 1.0, 10), ('TranslateY_Only_BBoxes', 0.4, 6)],
+        [('Rotate_BBox', 0.8, 10), ('Contrast', 0.0, 10)],  # , 
+        [('Cutout', 0.2, 2), ('Brightness', 0.8, 10)],
+        [('Color', 1.0, 6), ('Equalize', 1.0, 2)],
+        [('Cutout_Only_BBoxes', 0.4, 6), ('TranslateY_Only_BBoxes', 0.8, 2)],
+        [('Color', 0.2, 8), ('Rotate_BBox', 0.8, 10)],
+        [('Sharpness', 0.4, 4), ('TranslateY_Only_BBoxes', 0.0, 4)],
+        [('Sharpness', 1.0, 4), ('SolarizeAdd', 0.4, 4)],
+        [('Rotate_BBox', 1.0, 8), ('Sharpness', 0.2, 8)],
+        [('ShearY_BBox', 0.6, 10), ('Equalize_Only_BBoxes', 0.6, 8)],
+        [('ShearX_BBox', 0.2, 6), ('TranslateY_Only_BBoxes', 0.2, 10)],
+        [('SolarizeAdd', 0.6, 8), ('Brightness', 0.8, 10)],
+    ]
+    return policy
+def policy_vtest():
+    """Autoaugment test policy for debugging."""
+    # Each tuple is an augmentation operation of the form
+    # (operation, probability, magnitude). Each element in policy is a
+    # sub-policy that will be applied sequentially on the image.
+    policy = [[('TranslateX_BBox', 1.0, 4), ('Equalize', 1.0, 10)], ]
+    return policy
+def policy_v2():
+    """Additional policy that performs well on object detection."""
+    # Each tuple is an augmentation operation of the form
+    # (operation, probability, magnitude). Each element in policy is a
+    # sub-policy that will be applied sequentially on the image.
+    policy = [
+        [('Color', 0.0, 6), ('Cutout', 0.6, 8), ('Sharpness', 0.4, 8)],
+        [('Rotate_BBox', 0.4, 8), ('Sharpness', 0.4, 2),
+         ('Rotate_BBox', 0.8, 10)],
+        [('TranslateY_BBox', 1.0, 8), ('AutoContrast', 0.8, 2)],
+        [('AutoContrast', 0.4, 6), ('ShearX_BBox', 0.8, 8),
+         ('Brightness', 0.0, 10)],
+        [('SolarizeAdd', 0.2, 6), ('Contrast', 0.0, 10),
+         ('AutoContrast', 0.6, 0)],
+        [('Cutout', 0.2, 0), ('Solarize', 0.8, 8), ('Color', 1.0, 4)],
+        [('TranslateY_BBox', 0.0, 4), ('Equalize', 0.6, 8),
+         ('Solarize', 0.0, 10)],
+        [('TranslateY_BBox', 0.2, 2), ('ShearY_BBox', 0.8, 8),
+         ('Rotate_BBox', 0.8, 8)],
+        [('Cutout', 0.8, 8), ('Brightness', 0.8, 8), ('Cutout', 0.2, 2)],
+        [('Color', 0.8, 4), ('TranslateY_BBox', 1.0, 6),
+         ('Rotate_BBox', 0.6, 6)],
+        [('Rotate_BBox', 0.6, 10), ('BBox_Cutout', 1.0, 4), ('Cutout', 0.2, 8)],
+        [('Rotate_BBox', 0.0, 0), ('Equalize', 0.6, 6),
+         ('ShearY_BBox', 0.6, 8)],
+        [('Brightness', 0.8, 8), ('AutoContrast', 0.4, 2),
+         ('Brightness', 0.2, 2)],
+        [('TranslateY_BBox', 0.4, 8), ('Solarize', 0.4, 6),
+         ('SolarizeAdd', 0.2, 10)],
+        [('Contrast', 1.0, 10), ('SolarizeAdd', 0.2, 8), ('Equalize', 0.2, 4)],
+    ]
+    return policy
+def policy_v3():
+    """"Additional policy that performs well on object detection."""
+    # Each tuple is an augmentation operation of the form
+    # (operation, probability, magnitude). Each element in policy is a
+    # sub-policy that will be applied sequentially on the image.
+    policy = [
+        [('Posterize', 0.8, 2), ('TranslateX_BBox', 1.0, 8)],
+        [('BBox_Cutout', 0.2, 10), ('Sharpness', 1.0, 8)],
+        [('Rotate_BBox', 0.6, 8), ('Rotate_BBox', 0.8, 10)],
+        [('Equalize', 0.8, 10), ('AutoContrast', 0.2, 10)],
+        [('SolarizeAdd', 0.2, 2), ('TranslateY_BBox', 0.2, 8)],
+        [('Sharpness', 0.0, 2), ('Color', 0.4, 8)],
+        [('Equalize', 1.0, 8), ('TranslateY_BBox', 1.0, 8)],
+        [('Posterize', 0.6, 2), ('Rotate_BBox', 0.0, 10)],
+        [('AutoContrast', 0.6, 0), ('Rotate_BBox', 1.0, 6)],
+        [('Equalize', 0.0, 4), ('Cutout', 0.8, 10)],
+        [('Brightness', 1.0, 2), ('TranslateY_BBox', 1.0, 6)],
+        [('Contrast', 0.0, 2), ('ShearY_BBox', 0.8, 0)],
+        [('AutoContrast', 0.8, 10), ('Contrast', 0.2, 10)],
+        [('Rotate_BBox', 1.0, 10), ('Cutout', 1.0, 10)],
+        [('SolarizeAdd', 0.8, 6), ('Equalize', 0.8, 8)],
+    ]
+    return policy
+def _equal(val1, val2, eps=1e-8):
+    return abs(val1 - val2) <= eps
+def blend(image1, image2, factor):
+    """Blend image1 and image2 using 'factor'.
+    Factor can be above 0.0.    A value of 0.0 means only image1 is used.
+    A value of 1.0 means only image2 is used.    A value between 0.0 and
+    1.0 means we linearly interpolate the pixel values between the two
+    images.    A value greater than 1.0 "extrapolates" the difference
+    between the two pixel values, and we clip the results to values
+    between 0 and 255.
+    Args:
+        image1: An image Tensor of type uint8.
+        image2: An image Tensor of type uint8.
+        factor: A floating point value above 0.0.
+    Returns:
+        A blended image Tensor of type uint8.
+    """
+    if factor == 0.0:
+        return image1
+    if factor == 1.0:
+        return image2
+    image1 = image1.astype(np.float32)
+    image2 = image2.astype(np.float32)
+    difference = image2 - image1
+    scaled = factor * difference
+    # Do addition in float.
+    temp = image1 + scaled
+    # Interpolate
+    if factor > 0.0 and factor < 1.0:
+        # Interpolation means we always stay within 0 and 255.
+        return temp.astype(np.uint8)
+    # Extrapolate:
+    #
+    # We need to clip and then cast.
+    return np.clip(temp, a_min=0, a_max=255).astype(np.uint8)
+def cutout(image, pad_size, replace=0):
+    """Apply cutout (https://arxiv.org/abs/1708.04552) to image.
+    This operation applies a (2*pad_size x 2*pad_size) mask of zeros to
+    a random location within `img`. The pixel values filled in will be of the
+    value `replace`. The located where the mask will be applied is randomly
+    chosen uniformly over the whole image.
+    Args:
+        image: An image Tensor of type uint8.
+        pad_size: Specifies how big the zero mask that will be generated is that
+            is applied to the image. The mask will be of size
+            (2*pad_size x 2*pad_size).
+        replace: What pixel value to fill in the image in the area that has
+            the cutout mask applied to it.
+    Returns:
+        An image Tensor that is of type uint8.
+    Example:
+        img = cv2.imread( "/home/vis/gry/train/img_data/test.jpg", cv2.COLOR_BGR2RGB )
+        new_img = cutout(img, pad_size=50, replace=0)
+    """
+    image_height, image_width = image.shape[0], image.shape[1]
+    cutout_center_height = np.random.randint(low=0, high=image_height)
+    cutout_center_width = np.random.randint(low=0, high=image_width)
+    lower_pad = np.maximum(0, cutout_center_height - pad_size)
+    upper_pad = np.maximum(0, image_height - cutout_center_height - pad_size)
+    left_pad = np.maximum(0, cutout_center_width - pad_size)
+    right_pad = np.maximum(0, image_width - cutout_center_width - pad_size)
+    cutout_shape = [
+        image_height - (lower_pad + upper_pad),
+        image_width - (left_pad + right_pad)
+    ]
+    padding_dims = [[lower_pad, upper_pad], [left_pad, right_pad]]
+    mask = np.pad(np.zeros(
+        cutout_shape, dtype=image.dtype),
+                  padding_dims,
+                  'constant',
+                  constant_values=1)
+    mask = np.expand_dims(mask, -1)
+    mask = np.tile(mask, [1, 1, 3])
+    image = np.where(
+        np.equal(mask, 0),
+        np.ones_like(
+            image, dtype=image.dtype) * replace,
+        image)
+    return image.astype(np.uint8)
+def solarize(image, threshold=128):
+    # For each pixel in the image, select the pixel
+    # if the value is less than the threshold.
+    # Otherwise, subtract 255 from the pixel.
+    return np.where(image < threshold, image, 255 - image)
+def solarize_add(image, addition=0, threshold=128):
+    # For each pixel in the image less than threshold
+    # we add 'addition' amount to it and then clip the
+    # pixel value to be between 0 and 255. The value
+    # of 'addition' is between -128 and 128.
+    added_image = image.astype(np.int64) + addition
+    added_image = np.clip(added_image, a_min=0, a_max=255).astype(np.uint8)
+    return np.where(image < threshold, added_image, image)
+def color(image, factor):
+    """use cv2 to deal"""
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    degenerate = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
+    return blend(degenerate, image, factor)
+# refer to https://github.com/4uiiurz1/pytorch-auto-augment/blob/024b2eac4140c38df8342f09998e307234cafc80/auto_augment.py#L197
+def contrast(img, factor):
+    img = ImageEnhance.Contrast(Image.fromarray(img)).enhance(factor)
+    return np.array(img)
+def brightness(image, factor):
+    """Equivalent of PIL Brightness."""
+    degenerate = np.zeros_like(image)
+    return blend(degenerate, image, factor)
+def posterize(image, bits):
+    """Equivalent of PIL Posterize."""
+    shift = 8 - bits
+    return np.left_shift(np.right_shift(image, shift), shift)
+def rotate(image, degrees, replace):
+    """Rotates the image by degrees either clockwise or counterclockwise.
+    Args:
+        image: An image Tensor of type uint8.
+        degrees: Float, a scalar angle in degrees to rotate all images by. If
+            degrees is positive the image will be rotated clockwise otherwise it will
+            be rotated counterclockwise.
+        replace: A one or three value 1D tensor to fill empty pixels caused by
+            the rotate operation.
+    Returns:
+        The rotated version of image.
+    """
+    image = wrap(image)
+    image = Image.fromarray(image)
+    image = image.rotate(degrees)
+    image = np.array(image, dtype=np.uint8)
+    return unwrap(image, replace)
+def random_shift_bbox(image,
+                      bbox,
+                      pixel_scaling,
+                      replace,
+                      new_min_bbox_coords=None):
+    """Move the bbox and the image content to a slightly new random location.
+    Args:
+        image: 3D uint8 Tensor.
+        bbox: 1D Tensor that has 4 elements (min_y, min_x, max_y, max_x)
+            of type float that represents the normalized coordinates between 0 and 1.
+            The potential values for the new min corner of the bbox will be between
+            [old_min - pixel_scaling * bbox_height/2,
+             old_min - pixel_scaling * bbox_height/2].
+        pixel_scaling: A float between 0 and 1 that specifies the pixel range
+            that the new bbox location will be sampled from.
+        replace: A one or three value 1D tensor to fill empty pixels.
+        new_min_bbox_coords: If not None, then this is a tuple that specifies the
+            (min_y, min_x) coordinates of the new bbox. Normally this is randomly
+            specified, but this allows it to be manually set. The coordinates are
+            the absolute coordinates between 0 and image height/width and are int32.
+    Returns:
+        The new image that will have the shifted bbox location in it along with
+        the new bbox that contains the new coordinates.
+    """
+    # Obtains image height and width and create helper clip functions.
+    image_height, image_width = image.shape[0], image.shape[1]
+    image_height = float(image_height)
+    image_width = float(image_width)
+    def clip_y(val):
+        return np.clip(val, a_min=0, a_max=image_height - 1).astype(np.int32)
+    def clip_x(val):
+        return np.clip(val, a_min=0, a_max=image_width - 1).astype(np.int32)
+    # Convert bbox to pixel coordinates.
+    min_y = int(image_height * bbox[0])
+    min_x = int(image_width * bbox[1])
+    max_y = clip_y(image_height * bbox[2])
+    max_x = clip_x(image_width * bbox[3])
+    bbox_height, bbox_width = (max_y - min_y + 1, max_x - min_x + 1)
+    image_height = int(image_height)
+    image_width = int(image_width)
+    # Select the new min/max bbox ranges that are used for sampling the
+    # new min x/y coordinates of the shifted bbox.
+    minval_y = clip_y(min_y - np.int32(pixel_scaling * float(bbox_height) /
+                                       2.0))
+    maxval_y = clip_y(min_y + np.int32(pixel_scaling * float(bbox_height) /
+                                       2.0))
+    minval_x = clip_x(min_x - np.int32(pixel_scaling * float(bbox_width) / 2.0))
+    maxval_x = clip_x(min_x + np.int32(pixel_scaling * float(bbox_width) / 2.0))
+    # Sample and calculate the new unclipped min/max coordinates of the new bbox.
+    if new_min_bbox_coords is None:
+        unclipped_new_min_y = np.random.randint(
+            low=minval_y, high=maxval_y, dtype=np.int32)
+        unclipped_new_min_x = np.random.randint(
+            low=minval_x, high=maxval_x, dtype=np.int32)
+    else:
+        unclipped_new_min_y, unclipped_new_min_x = (
+            clip_y(new_min_bbox_coords[0]), clip_x(new_min_bbox_coords[1]))
+    unclipped_new_max_y = unclipped_new_min_y + bbox_height - 1
+    unclipped_new_max_x = unclipped_new_min_x + bbox_width - 1
+    # Determine if any of the new bbox was shifted outside the current image.
+    # This is used for determining if any of the original bbox content should be
+    # discarded.
+    new_min_y, new_min_x, new_max_y, new_max_x = (
+        clip_y(unclipped_new_min_y), clip_x(unclipped_new_min_x),
+        clip_y(unclipped_new_max_y), clip_x(unclipped_new_max_x))
+    shifted_min_y = (new_min_y - unclipped_new_min_y) + min_y
+    shifted_max_y = max_y - (unclipped_new_max_y - new_max_y)
+    shifted_min_x = (new_min_x - unclipped_new_min_x) + min_x
+    shifted_max_x = max_x - (unclipped_new_max_x - new_max_x)
+    # Create the new bbox tensor by converting pixel integer values to floats.
+    new_bbox = np.stack([
+        float(new_min_y) / float(image_height), float(new_min_x) /
+        float(image_width), float(new_max_y) / float(image_height),
+        float(new_max_x) / float(image_width)
+    ])
+    # Copy the contents in the bbox and fill the old bbox location
+    # with gray (128).
+    bbox_content = image[shifted_min_y:shifted_max_y + 1, shifted_min_x:
+                         shifted_max_x + 1, :]
+    def mask_and_add_image(min_y_, min_x_, max_y_, max_x_, mask, content_tensor,
+                           image_):
+        """Applies mask to bbox region in image then adds content_tensor to it."""
+        mask = np.pad(mask, [[min_y_, (image_height - 1) - max_y_],
+                             [min_x_, (image_width - 1) - max_x_], [0, 0]],
+                      'constant',
+                      constant_values=1)
+        content_tensor = np.pad(content_tensor,
+                                [[min_y_, (image_height - 1) - max_y_],
+                                 [min_x_, (image_width - 1) - max_x_], [0, 0]],
+                                'constant',
+                                constant_values=0)
+        return image_ * mask + content_tensor
+    # Zero out original bbox location.
+    mask = np.zeros_like(image)[min_y:max_y + 1, min_x:max_x + 1, :]
+    grey_tensor = np.zeros_like(mask) + replace[0]
+    image = mask_and_add_image(min_y, min_x, max_y, max_x, mask, grey_tensor,
+                               image)
+    # Fill in bbox content to new bbox location.
+    mask = np.zeros_like(bbox_content)
+    image = mask_and_add_image(new_min_y, new_min_x, new_max_y, new_max_x, mask,
+                               bbox_content, image)
+    return image.astype(np.uint8), new_bbox
+def _clip_bbox(min_y, min_x, max_y, max_x):
+    """Clip bounding box coordinates between 0 and 1.
+    Args:
+        min_y: Normalized bbox coordinate of type float between 0 and 1.
+        min_x: Normalized bbox coordinate of type float between 0 and 1.
+        max_y: Normalized bbox coordinate of type float between 0 and 1.
+        max_x: Normalized bbox coordinate of type float between 0 and 1.
+    Returns:
+        Clipped coordinate values between 0 and 1.
+    """
+    min_y = np.clip(min_y, a_min=0, a_max=1.0)
+    min_x = np.clip(min_x, a_min=0, a_max=1.0)
+    max_y = np.clip(max_y, a_min=0, a_max=1.0)
+    max_x = np.clip(max_x, a_min=0, a_max=1.0)
+    return min_y, min_x, max_y, max_x
+def _check_bbox_area(min_y, min_x, max_y, max_x, delta=0.05):
+    """Adjusts bbox coordinates to make sure the area is > 0.
+    Args:
+        min_y: Normalized bbox coordinate of type float between 0 and 1.
+        min_x: Normalized bbox coordinate of type float between 0 and 1.
+        max_y: Normalized bbox coordinate of type float between 0 and 1.
+        max_x: Normalized bbox coordinate of type float between 0 and 1.
+        delta: Float, this is used to create a gap of size 2 * delta between
+            bbox min/max coordinates that are the same on the boundary.
+            This prevents the bbox from having an area of zero.
+    Returns:
+        Tuple of new bbox coordinates between 0 and 1 that will now have a
+        guaranteed area > 0.
+    """
+    height = max_y - min_y
+    width = max_x - min_x
+    def _adjust_bbox_boundaries(min_coord, max_coord):
+        # Make sure max is never 0 and min is never 1.
+        max_coord = np.maximum(max_coord, 0.0 + delta)
+        min_coord = np.minimum(min_coord, 1.0 - delta)
+        return min_coord, max_coord
+    if _equal(height, 0):
+        min_y, max_y = _adjust_bbox_boundaries(min_y, max_y)
+    if _equal(width, 0):
+        min_x, max_x = _adjust_bbox_boundaries(min_x, max_x)
+    return min_y, min_x, max_y, max_x
+def _scale_bbox_only_op_probability(prob):
+    """Reduce the probability of the bbox-only operation.
+    Probability is reduced so that we do not distort the content of too many
+    bounding boxes that are close to each other. The value of 3.0 was a chosen
+    hyper parameter when designing the autoaugment algorithm that we found
+    empirically to work well.
+    Args:
+        prob: Float that is the probability of applying the bbox-only operation.
+    Returns:
+        Reduced probability.
+    """
+    return prob / 3.0
+def _apply_bbox_augmentation(image, bbox, augmentation_func, *args):
+    """Applies augmentation_func to the subsection of image indicated by bbox.
+    Args:
+        image: 3D uint8 Tensor.
+        bbox: 1D Tensor that has 4 elements (min_y, min_x, max_y, max_x)
+            of type float that represents the normalized coordinates between 0 and 1.
+        augmentation_func: Augmentation function that will be applied to the
+            subsection of image.
+        *args: Additional parameters that will be passed into augmentation_func
+            when it is called.
+    Returns:
+        A modified version of image, where the bbox location in the image will
+        have `ugmentation_func applied to it.
+    """
+    image_height = image.shape[0]
+    image_width = image.shape[1]
+    min_y = int(image_height * bbox[0])
+    min_x = int(image_width * bbox[1])
+    max_y = int(image_height * bbox[2])
+    max_x = int(image_width * bbox[3])
+    # Clip to be sure the max values do not fall out of range.
+    max_y = np.minimum(max_y, image_height - 1)
+    max_x = np.minimum(max_x, image_width - 1)
+    # Get the sub-tensor that is the image within the bounding box region.
+    bbox_content = image[min_y:max_y + 1, min_x:max_x + 1, :]
+    # Apply the augmentation function to the bbox portion of the image.
+    augmented_bbox_content = augmentation_func(bbox_content, *args)
+    # Pad the augmented_bbox_content and the mask to match the shape of original
+    # image.
+    augmented_bbox_content = np.pad(
+        augmented_bbox_content, [[min_y, (image_height - 1) - max_y],
+                                 [min_x, (image_width - 1) - max_x], [0, 0]],
+        'constant',
+        constant_values=1)
+    # Create a mask that will be used to zero out a part of the original image.
+    mask_tensor = np.zeros_like(bbox_content)
+    mask_tensor = np.pad(mask_tensor,
+                         [[min_y, (image_height - 1) - max_y],
+                          [min_x, (image_width - 1) - max_x], [0, 0]],
+                         'constant',
+                         constant_values=1)
+    # Replace the old bbox content with the new augmented content.
+    image = image * mask_tensor + augmented_bbox_content
+    return image.astype(np.uint8)
+def _concat_bbox(bbox, bboxes):
+    """Helper function that concates bbox to bboxes along the first dimension."""
+    # Note if all elements in bboxes are -1 (_INVALID_BOX), then this means
+    # we discard bboxes and start the bboxes Tensor with the current bbox.
+    bboxes_sum_check = np.sum(bboxes)
+    bbox = np.expand_dims(bbox, 0)
+    # This check will be true when it is an _INVALID_BOX
+    if _equal(bboxes_sum_check, -4):
+        bboxes = bbox
+    else:
+        bboxes = np.concatenate([bboxes, bbox], 0)
+    return bboxes
+def _apply_bbox_augmentation_wrapper(image, bbox, new_bboxes, prob,
+                                     augmentation_func, func_changes_bbox,
+                                     *args):
+    """Applies _apply_bbox_augmentation with probability prob.
+    Args:
+        image: 3D uint8 Tensor.
+        bbox: 1D Tensor that has 4 elements (min_y, min_x, max_y, max_x)
+            of type float that represents the normalized coordinates between 0 and 1.
+        new_bboxes: 2D Tensor that is a list of the bboxes in the image after they
+            have been altered by aug_func. These will only be changed when
+            func_changes_bbox is set to true. Each bbox has 4 elements
+            (min_y, min_x, max_y, max_x) of type float that are the normalized
+            bbox coordinates between 0 and 1.
+        prob: Float that is the probability of applying _apply_bbox_augmentation.
+        augmentation_func: Augmentation function that will be applied to the
+            subsection of image.
+        func_changes_bbox: Boolean. Does augmentation_func return bbox in addition
+            to image.
+        *args: Additional parameters that will be passed into augmentation_func
+            when it is called.
+    Returns:
+        A tuple. Fist element is a modified version of image, where the bbox
+        location in the image will have augmentation_func applied to it if it is
+        chosen to be called with probability `prob`. The second element is a
+        Tensor of Tensors of length 4 that will contain the altered bbox after
+        applying augmentation_func.
+    """
+    should_apply_op = (np.random.rand() + prob >= 1)
+    if func_changes_bbox:
+        if should_apply_op:
+            augmented_image, bbox = augmentation_func(image, bbox, *args)
+        else:
+            augmented_image, bbox = (image, bbox)
+    else:
+        if should_apply_op:
+            augmented_image = _apply_bbox_augmentation(image, bbox,
+                                                       augmentation_func, *args)
+        else:
+            augmented_image = image
+    new_bboxes = _concat_bbox(bbox, new_bboxes)
+    return augmented_image.astype(np.uint8), new_bboxes
+def _apply_multi_bbox_augmentation(image, bboxes, prob, aug_func,
+                                   func_changes_bbox, *args):
+    """Applies aug_func to the image for each bbox in bboxes.
+    Args:
+        image: 3D uint8 Tensor.
+        bboxes: 2D Tensor that is a list of the bboxes in the image. Each bbox
+            has 4 elements (min_y, min_x, max_y, max_x) of type float.
+        prob: Float that is the probability of applying aug_func to a specific
+            bounding box within the image.
+        aug_func: Augmentation function that will be applied to the
+            subsections of image indicated by the bbox values in bboxes.
+        func_changes_bbox: Boolean. Does augmentation_func return bbox in addition
+            to image.
+        *args: Additional parameters that will be passed into augmentation_func
+            when it is called.
+    Returns:
+        A modified version of image, where each bbox location in the image will
+        have augmentation_func applied to it if it is chosen to be called with
+        probability prob independently across all bboxes. Also the final
+        bboxes are returned that will be unchanged if func_changes_bbox is set to
+        false and if true, the new altered ones will be returned.
+    """
+    # Will keep track of the new altered bboxes after aug_func is repeatedly
+    # applied. The -1 values are a dummy value and this first Tensor will be
+    # removed upon appending the first real bbox.
+    new_bboxes = np.array(_INVALID_BOX)
+    # If the bboxes are empty, then just give it _INVALID_BOX. The result
+    # will be thrown away.
+    bboxes = np.array((_INVALID_BOX)) if bboxes.size == 0 else bboxes
+    assert bboxes.shape[1] == 4, "bboxes.shape[1] must be 4!!!!"
+    # pylint:disable=g-long-lambda
+    # pylint:disable=line-too-long
+    wrapped_aug_func = lambda _image, bbox, _new_bboxes: _apply_bbox_augmentation_wrapper(_image, bbox, _new_bboxes, prob, aug_func, func_changes_bbox, *args)
+    # pylint:enable=g-long-lambda
+    # pylint:enable=line-too-long
+    # Setup the while_loop.
+    num_bboxes = bboxes.shape[0]  # We loop until we go over all bboxes.
+    idx = 0  # Counter for the while loop.
+    # Conditional function when to end the loop once we go over all bboxes
+    # images_and_bboxes contain (_image, _new_bboxes)
+    def cond(_idx, _images_and_bboxes):
+        return _idx < num_bboxes
+    # Shuffle the bboxes so that the augmentation order is not deterministic if
+    # we are not changing the bboxes with aug_func.
+    # if not func_changes_bbox:
+    #     print(bboxes)
+    #     loop_bboxes = np.take(bboxes,np.random.permutation(bboxes.shape[0]),axis=0)
+    #     print(loop_bboxes)
+    # else:
+    #     loop_bboxes = bboxes
+    # we can not shuffle the bbox because it does not contain class information here
+    loop_bboxes = deepcopy(bboxes)
+    # Main function of while_loop where we repeatedly apply augmentation on the
+    # bboxes in the image.
+    # pylint:disable=g-long-lambda
+    body = lambda _idx, _images_and_bboxes: [
+            _idx + 1, wrapped_aug_func(_images_and_bboxes[0],
+                                         loop_bboxes[_idx],
+                                         _images_and_bboxes[1])]
+    while (cond(idx, (image, new_bboxes))):
+        idx, (image, new_bboxes) = body(idx, (image, new_bboxes))
+    # Either return the altered bboxes or the original ones depending on if
+    # we altered them in anyway.
+    if func_changes_bbox:
+        final_bboxes = new_bboxes
+    else:
+        final_bboxes = bboxes
+    return image, final_bboxes
+def _apply_multi_bbox_augmentation_wrapper(image, bboxes, prob, aug_func,
+                                           func_changes_bbox, *args):
+    """Checks to be sure num bboxes > 0 before calling inner function."""
+    num_bboxes = len(bboxes)
+    new_image = deepcopy(image)
+    new_bboxes = deepcopy(bboxes)
+    if num_bboxes != 0:
+        new_image, new_bboxes = _apply_multi_bbox_augmentation(
+            new_image, new_bboxes, prob, aug_func, func_changes_bbox, *args)
+    return new_image, new_bboxes
+def rotate_only_bboxes(image, bboxes, prob, degrees, replace):
+    """Apply rotate to each bbox in the image with probability prob."""
+    func_changes_bbox = False
+    prob = _scale_bbox_only_op_probability(prob)
+    return _apply_multi_bbox_augmentation_wrapper(
+        image, bboxes, prob, rotate, func_changes_bbox, degrees, replace)
+def shear_x_only_bboxes(image, bboxes, prob, level, replace):
+    """Apply shear_x to each bbox in the image with probability prob."""
+    func_changes_bbox = False
+    prob = _scale_bbox_only_op_probability(prob)
+    return _apply_multi_bbox_augmentation_wrapper(
+        image, bboxes, prob, shear_x, func_changes_bbox, level, replace)
+def shear_y_only_bboxes(image, bboxes, prob, level, replace):
+    """Apply shear_y to each bbox in the image with probability prob."""
+    func_changes_bbox = False
+    prob = _scale_bbox_only_op_probability(prob)
+    return _apply_multi_bbox_augmentation_wrapper(
+        image, bboxes, prob, shear_y, func_changes_bbox, level, replace)
+def translate_x_only_bboxes(image, bboxes, prob, pixels, replace):
+    """Apply translate_x to each bbox in the image with probability prob."""
+    func_changes_bbox = False
+    prob = _scale_bbox_only_op_probability(prob)
+    return _apply_multi_bbox_augmentation_wrapper(
+        image, bboxes, prob, translate_x, func_changes_bbox, pixels, replace)
+def translate_y_only_bboxes(image, bboxes, prob, pixels, replace):
+    """Apply translate_y to each bbox in the image with probability prob."""
+    func_changes_bbox = False
+    prob = _scale_bbox_only_op_probability(prob)
+    return _apply_multi_bbox_augmentation_wrapper(
+        image, bboxes, prob, translate_y, func_changes_bbox, pixels, replace)
+def flip_only_bboxes(image, bboxes, prob):
+    """Apply flip_lr to each bbox in the image with probability prob."""
+    func_changes_bbox = False
+    prob = _scale_bbox_only_op_probability(prob)
+    return _apply_multi_bbox_augmentation_wrapper(image, bboxes, prob,
+                                                  np.fliplr, func_changes_bbox)
+def solarize_only_bboxes(image, bboxes, prob, threshold):
+    """Apply solarize to each bbox in the image with probability prob."""
+    func_changes_bbox = False
+    prob = _scale_bbox_only_op_probability(prob)
+    return _apply_multi_bbox_augmentation_wrapper(image, bboxes, prob, solarize,
+                                                  func_changes_bbox, threshold)
+def equalize_only_bboxes(image, bboxes, prob):
+    """Apply equalize to each bbox in the image with probability prob."""
+    func_changes_bbox = False
+    prob = _scale_bbox_only_op_probability(prob)
+    return _apply_multi_bbox_augmentation_wrapper(image, bboxes, prob, equalize,
+                                                  func_changes_bbox)
+def cutout_only_bboxes(image, bboxes, prob, pad_size, replace):
+    """Apply cutout to each bbox in the image with probability prob."""
+    func_changes_bbox = False
+    prob = _scale_bbox_only_op_probability(prob)
+    return _apply_multi_bbox_augmentation_wrapper(
+        image, bboxes, prob, cutout, func_changes_bbox, pad_size, replace)
+def _rotate_bbox(bbox, image_height, image_width, degrees):
+    """Rotates the bbox coordinated by degrees.
+    Args:
+        bbox: 1D Tensor that has 4 elements (min_y, min_x, max_y, max_x)
+            of type float that represents the normalized coordinates between 0 and 1.
+        image_height: Int, height of the image.
+        image_width: Int, height of the image.
+        degrees: Float, a scalar angle in degrees to rotate all images by. If
+            degrees is positive the image will be rotated clockwise otherwise it will
+            be rotated counterclockwise.
+    Returns:
+        A tensor of the same shape as bbox, but now with the rotated coordinates.
+    """
+    image_height, image_width = (float(image_height), float(image_width))
+    # Convert from degrees to radians.
+    degrees_to_radians = math.pi / 180.0
+    radians = degrees * degrees_to_radians
+    # Translate the bbox to the center of the image and turn the normalized 0-1
+    # coordinates to absolute pixel locations.
+    # Y coordinates are made negative as the y axis of images goes down with
+    # increasing pixel values, so we negate to make sure x axis and y axis points
+    # are in the traditionally positive direction.
+    min_y = -int(image_height * (bbox[0] - 0.5))
+    min_x = int(image_width * (bbox[1] - 0.5))
+    max_y = -int(image_height * (bbox[2] - 0.5))
+    max_x = int(image_width * (bbox[3] - 0.5))
+    coordinates = np.stack([[min_y, min_x], [min_y, max_x], [max_y, min_x],
+                            [max_y, max_x]]).astype(np.float32)
+    # Rotate the coordinates according to the rotation matrix clockwise if
+    # radians is positive, else negative
+    rotation_matrix = np.stack([[math.cos(radians), math.sin(radians)],
+                                [-math.sin(radians), math.cos(radians)]])
+    new_coords = np.matmul(rotation_matrix,
+                           np.transpose(coordinates)).astype(np.int32)
+    # Find min/max values and convert them back to normalized 0-1 floats.
+    min_y = -(float(np.max(new_coords[0, :])) / image_height - 0.5)
+    min_x = float(np.min(new_coords[1, :])) / image_width + 0.5
+    max_y = -(float(np.min(new_coords[0, :])) / image_height - 0.5)
+    max_x = float(np.max(new_coords[1, :])) / image_width + 0.5
+    # Clip the bboxes to be sure the fall between [0, 1].
+    min_y, min_x, max_y, max_x = _clip_bbox(min_y, min_x, max_y, max_x)
+    min_y, min_x, max_y, max_x = _check_bbox_area(min_y, min_x, max_y, max_x)
+    return np.stack([min_y, min_x, max_y, max_x])
+def rotate_with_bboxes(image, bboxes, degrees, replace):
+    # Rotate the image.
+    image = rotate(image, degrees, replace)
+    # Convert bbox coordinates to pixel values.
+    image_height, image_width = image.shape[:2]
+    # pylint:disable=g-long-lambda
+    wrapped_rotate_bbox = lambda bbox: _rotate_bbox(bbox, image_height, image_width, degrees)
+    # pylint:enable=g-long-lambda
+    new_bboxes = np.zeros_like(bboxes)
+    for idx in range(len(bboxes)):
+        new_bboxes[idx] = wrapped_rotate_bbox(bboxes[idx])
+    return image, new_bboxes
+def translate_x(image, pixels, replace):
+    """Equivalent of PIL Translate in X dimension."""
+    image = Image.fromarray(wrap(image))
+    image = image.transform(image.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0))
+    return unwrap(np.array(image), replace)
+def translate_y(image, pixels, replace):
+    """Equivalent of PIL Translate in Y dimension."""
+    image = Image.fromarray(wrap(image))
+    image = image.transform(image.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels))
+    return unwrap(np.array(image), replace)
+def _shift_bbox(bbox, image_height, image_width, pixels, shift_horizontal):
+    """Shifts the bbox coordinates by pixels.
+    Args:
+        bbox: 1D Tensor that has 4 elements (min_y, min_x, max_y, max_x)
+            of type float that represents the normalized coordinates between 0 and 1.
+        image_height: Int, height of the image.
+        image_width: Int, width of the image.
+        pixels: An int. How many pixels to shift the bbox.
+        shift_horizontal: Boolean. If true then shift in X dimension else shift in
+            Y dimension.
+    Returns:
+        A tensor of the same shape as bbox, but now with the shifted coordinates.
+    """
+    pixels = int(pixels)
+    # Convert bbox to integer pixel locations.
+    min_y = int(float(image_height) * bbox[0])
+    min_x = int(float(image_width) * bbox[1])
+    max_y = int(float(image_height) * bbox[2])
+    max_x = int(float(image_width) * bbox[3])
+    if shift_horizontal:
+        min_x = np.maximum(0, min_x - pixels)
+        max_x = np.minimum(image_width, max_x - pixels)
+    else:
+        min_y = np.maximum(0, min_y - pixels)
+        max_y = np.minimum(image_height, max_y - pixels)
+    # Convert bbox back to floats.
+    min_y = float(min_y) / float(image_height)
+    min_x = float(min_x) / float(image_width)
+    max_y = float(max_y) / float(image_height)
+    max_x = float(max_x) / float(image_width)
+    # Clip the bboxes to be sure the fall between [0, 1].
+    min_y, min_x, max_y, max_x = _clip_bbox(min_y, min_x, max_y, max_x)
+    min_y, min_x, max_y, max_x = _check_bbox_area(min_y, min_x, max_y, max_x)
+    return np.stack([min_y, min_x, max_y, max_x])
+def translate_bbox(image, bboxes, pixels, replace, shift_horizontal):
+    """Equivalent of PIL Translate in X/Y dimension that shifts image and bbox.
+    Args:
+        image: 3D uint8 Tensor.
+        bboxes: 2D Tensor that is a list of the bboxes in the image. Each bbox
+            has 4 elements (min_y, min_x, max_y, max_x) of type float with values
+            between [0, 1].
+        pixels: An int. How many pixels to shift the image and bboxes
+        replace: A one or three value 1D tensor to fill empty pixels.
+        shift_horizontal: Boolean. If true then shift in X dimension else shift in
+            Y dimension.
+    Returns:
+        A tuple containing a 3D uint8 Tensor that will be the result of translating
+        image by pixels. The second element of the tuple is bboxes, where now
+        the coordinates will be shifted to reflect the shifted image.
+    """
+    if shift_horizontal:
+        image = translate_x(image, pixels, replace)
+    else:
+        image = translate_y(image, pixels, replace)
+    # Convert bbox coordinates to pixel values.
+    image_height, image_width = image.shape[0], image.shape[1]
+    # pylint:disable=g-long-lambda
+    wrapped_shift_bbox = lambda bbox: _shift_bbox(bbox, image_height, image_width, pixels, shift_horizontal)
+    # pylint:enable=g-long-lambda
+    new_bboxes = deepcopy(bboxes)
+    num_bboxes = len(bboxes)
+    for idx in range(num_bboxes):
+        new_bboxes[idx] = wrapped_shift_bbox(bboxes[idx])
+    return image.astype(np.uint8), new_bboxes
+def shear_x(image, level, replace):
+    """Equivalent of PIL Shearing in X dimension."""
+    # Shear parallel to x axis is a projective transform
+    # with a matrix form of:
+    # [1    level
+    #    0    1].
+    image = Image.fromarray(wrap(image))
+    image = image.transform(image.size, Image.AFFINE, (1, level, 0, 0, 1, 0))
+    return unwrap(np.array(image), replace)
+def shear_y(image, level, replace):
+    """Equivalent of PIL Shearing in Y dimension."""
+    # Shear parallel to y axis is a projective transform
+    # with a matrix form of:
+    # [1    0
+    #    level    1].
+    image = Image.fromarray(wrap(image))
+    image = image.transform(image.size, Image.AFFINE, (1, 0, 0, level, 1, 0))
+    return unwrap(np.array(image), replace)
+def _shear_bbox(bbox, image_height, image_width, level, shear_horizontal):
+    """Shifts the bbox according to how the image was sheared.
+    Args:
+        bbox: 1D Tensor that has 4 elements (min_y, min_x, max_y, max_x)
+            of type float that represents the normalized coordinates between 0 and 1.
+        image_height: Int, height of the image.
+        image_width: Int, height of the image.
+        level: Float. How much to shear the image.
+        shear_horizontal: If true then shear in X dimension else shear in
+            the Y dimension.
+    Returns:
+        A tensor of the same shape as bbox, but now with the shifted coordinates.
+    """
+    image_height, image_width = (float(image_height), float(image_width))
+    # Change bbox coordinates to be pixels.
+    min_y = int(image_height * bbox[0])
+    min_x = int(image_width * bbox[1])
+    max_y = int(image_height * bbox[2])
+    max_x = int(image_width * bbox[3])
+    coordinates = np.stack(
+        [[min_y, min_x], [min_y, max_x], [max_y, min_x], [max_y, max_x]])
+    coordinates = coordinates.astype(np.float32)
+    # Shear the coordinates according to the translation matrix.
+    if shear_horizontal:
+        translation_matrix = np.stack([[1, 0], [-level, 1]])
+    else:
+        translation_matrix = np.stack([[1, -level], [0, 1]])
+    translation_matrix = translation_matrix.astype(np.float32)
+    new_coords = np.matmul(translation_matrix,
+                           np.transpose(coordinates)).astype(np.int32)
+    # Find min/max values and convert them back to floats.
+    min_y = float(np.min(new_coords[0, :])) / image_height
+    min_x = float(np.min(new_coords[1, :])) / image_width
+    max_y = float(np.max(new_coords[0, :])) / image_height
+    max_x = float(np.max(new_coords[1, :])) / image_width
+    # Clip the bboxes to be sure the fall between [0, 1].
+    min_y, min_x, max_y, max_x = _clip_bbox(min_y, min_x, max_y, max_x)
+    min_y, min_x, max_y, max_x = _check_bbox_area(min_y, min_x, max_y, max_x)
+    return np.stack([min_y, min_x, max_y, max_x])
+def shear_with_bboxes(image, bboxes, level, replace, shear_horizontal):
+    """Applies Shear Transformation to the image and shifts the bboxes.
+    Args:
+        image: 3D uint8 Tensor.
+        bboxes: 2D Tensor that is a list of the bboxes in the image. Each bbox
+            has 4 elements (min_y, min_x, max_y, max_x) of type float with values
+            between [0, 1].
+        level: Float. How much to shear the image. This value will be between
+            -0.3 to 0.3.
+        replace: A one or three value 1D tensor to fill empty pixels.
+        shear_horizontal: Boolean. If true then shear in X dimension else shear in
+            the Y dimension.
+    Returns:
+        A tuple containing a 3D uint8 Tensor that will be the result of shearing
+        image by level. The second element of the tuple is bboxes, where now
+        the coordinates will be shifted to reflect the sheared image.
+    """
+    if shear_horizontal:
+        image = shear_x(image, level, replace)
+    else:
+        image = shear_y(image, level, replace)
+    # Convert bbox coordinates to pixel values.
+    image_height, image_width = image.shape[:2]
+    # pylint:disable=g-long-lambda
+    wrapped_shear_bbox = lambda bbox: _shear_bbox(bbox, image_height, image_width, level, shear_horizontal)
+    # pylint:enable=g-long-lambda
+    new_bboxes = deepcopy(bboxes)
+    num_bboxes = len(bboxes)
+    for idx in range(num_bboxes):
+        new_bboxes[idx] = wrapped_shear_bbox(bboxes[idx])
+    return image.astype(np.uint8), new_bboxes
+def autocontrast(image):
+    """Implements Autocontrast function from PIL.
+    Args:
+        image: A 3D uint8 tensor.
+    Returns:
+        The image after it has had autocontrast applied to it and will be of type
+        uint8.
+    """
+    def scale_channel(image):
+        """Scale the 2D image using the autocontrast rule."""
+        # A possibly cheaper version can be done using cumsum/unique_with_counts
+        # over the histogram values, rather than iterating over the entire image.
+        # to compute mins and maxes.
+        lo = float(np.min(image))
+        hi = float(np.max(image))
+        # Scale the image, making the lowest value 0 and the highest value 255.
+        def scale_values(im):
+            scale = 255.0 / (hi - lo)
+            offset = -lo * scale
+            im = im.astype(np.float32) * scale + offset
+            img = np.clip(im, a_min=0, a_max=255.0)
+            return im.astype(np.uint8)
+        result = scale_values(image) if hi > lo else image
+        return result
+    # Assumes RGB for now.    Scales each channel independently
+    # and then stacks the result.
+    s1 = scale_channel(image[:, :, 0])
+    s2 = scale_channel(image[:, :, 1])
+    s3 = scale_channel(image[:, :, 2])
+    image = np.stack([s1, s2, s3], 2)
+    return image
+def sharpness(image, factor):
+    """Implements Sharpness function from PIL."""
+    orig_image = image
+    image = image.astype(np.float32)
+    # Make image 4D for conv operation.
+    # SMOOTH PIL Kernel.
+    kernel = np.array([[1, 1, 1], [1, 5, 1], [1, 1, 1]], dtype=np.float32) / 13.
+    result = cv2.filter2D(image, -1, kernel).astype(np.uint8)
+    # Blend the final result.
+    return blend(result, orig_image, factor)
+def equalize(image):
+    """Implements Equalize function from PIL using."""
+    def scale_channel(im, c):
+        """Scale the data in the channel to implement equalize."""
+        im = im[:, :, c].astype(np.int32)
+        # Compute the histogram of the image channel.
+        histo, _ = np.histogram(im, range=[0, 255], bins=256)
+        # For the purposes of computing the step, filter out the nonzeros.
+        nonzero = np.where(np.not_equal(histo, 0))
+        nonzero_histo = np.reshape(np.take(histo, nonzero), [-1])
+        step = (np.sum(nonzero_histo) - nonzero_histo[-1]) // 255
+        def build_lut(histo, step):
+            # Compute the cumulative sum, shifting by step // 2
+            # and then normalization by step.
+            lut = (np.cumsum(histo) + (step // 2)) // step
+            # Shift lut, prepending with 0.
+            lut = np.concatenate([[0], lut[:-1]], 0)
+            # Clip the counts to be in range.    This is done
+            # in the C code for image.point.
+            return np.clip(lut, a_min=0, a_max=255).astype(np.uint8)
+        # If step is zero, return the original image.    Otherwise, build
+        # lut from the full histogram and step and then index from it.
+        if step == 0:
+            result = im
+        else:
+            result = np.take(build_lut(histo, step), im)
+        return result.astype(np.uint8)
+    # Assumes RGB for now.    Scales each channel independently
+    # and then stacks the result.
+    s1 = scale_channel(image, 0)
+    s2 = scale_channel(image, 1)
+    s3 = scale_channel(image, 2)
+    image = np.stack([s1, s2, s3], 2)
+    return image
+def wrap(image):
+    """Returns 'image' with an extra channel set to all 1s."""
+    shape = image.shape
+    extended_channel = 255 * np.ones([shape[0], shape[1], 1], image.dtype)
+    extended = np.concatenate([image, extended_channel], 2).astype(image.dtype)
+    return extended
+def unwrap(image, replace):
+    """Unwraps an image produced by wrap.
+    Where there is a 0 in the last channel for every spatial position,
+    the rest of the three channels in that spatial dimension are grayed
+    (set to 128).    Operations like translate and shear on a wrapped
+    Tensor will leave 0s in empty locations.    Some transformations look
+    at the intensity of values to do preprocessing, and we want these
+    empty pixels to assume the 'average' value, rather than pure black.
+    Args:
+        image: A 3D Image Tensor with 4 channels.
+        replace: A one or three value 1D tensor to fill empty pixels.
+    Returns:
+        image: A 3D image Tensor with 3 channels.
+    """
+    image_shape = image.shape
+    # Flatten the spatial dimensions.
+    flattened_image = np.reshape(image, [-1, image_shape[2]])
+    # Find all pixels where the last channel is zero.
+    alpha_channel = flattened_image[:, 3]
+    replace = np.concatenate([replace, np.ones([1], image.dtype)], 0)
+    # Where they are zero, fill them in with 'replace'.
+    alpha_channel = np.reshape(alpha_channel, (-1, 1))
+    alpha_channel = np.tile(alpha_channel, reps=(1, flattened_image.shape[1]))
+    flattened_image = np.where(
+        np.equal(alpha_channel, 0),
+        np.ones_like(
+            flattened_image, dtype=image.dtype) * replace,
+        flattened_image)
+    image = np.reshape(flattened_image, image_shape)
+    image = image[:, :, :3]
+    return image.astype(np.uint8)
+def _cutout_inside_bbox(image, bbox, pad_fraction):
+    """Generates cutout mask and the mean pixel value of the bbox.
+    First a location is randomly chosen within the image as the center where the
+    cutout mask will be applied. Note this can be towards the boundaries of the
+    image, so the full cutout mask may not be applied.
+    Args:
+        image: 3D uint8 Tensor.
+        bbox: 1D Tensor that has 4 elements (min_y, min_x, max_y, max_x)
+            of type float that represents the normalized coordinates between 0 and 1.
+        pad_fraction: Float that specifies how large the cutout mask should be in
+            in reference to the size of the original bbox. If pad_fraction is 0.25,
+            then the cutout mask will be of shape
+            (0.25 * bbox height, 0.25 * bbox width).
+    Returns:
+        A tuple. Fist element is a tensor of the same shape as image where each
+        element is either a 1 or 0 that is used to determine where the image
+        will have cutout applied. The second element is the mean of the pixels
+        in the image where the bbox is located.
+        mask value: [0,1]
+    """
+    image_height, image_width = image.shape[0], image.shape[1]
+    # Transform from shape [1, 4] to [4].
+    bbox = np.squeeze(bbox)
+    min_y = int(float(image_height) * bbox[0])
+    min_x = int(float(image_width) * bbox[1])
+    max_y = int(float(image_height) * bbox[2])
+    max_x = int(float(image_width) * bbox[3])
+    # Calculate the mean pixel values in the bounding box, which will be used
+    # to fill the cutout region.
+    mean = np.mean(image[min_y:max_y + 1, min_x:max_x + 1], axis=(0, 1))
+    # Cutout mask will be size pad_size_heigh * 2 by pad_size_width * 2 if the
+    # region lies entirely within the bbox.
+    box_height = max_y - min_y + 1
+    box_width = max_x - min_x + 1
+    pad_size_height = int(pad_fraction * (box_height / 2))
+    pad_size_width = int(pad_fraction * (box_width / 2))
+    # Sample the center location in the image where the zero mask will be applied.
+    cutout_center_height = np.random.randint(min_y, max_y + 1, dtype=np.int32)
+    cutout_center_width = np.random.randint(min_x, max_x + 1, dtype=np.int32)
+    lower_pad = np.maximum(0, cutout_center_height - pad_size_height)
+    upper_pad = np.maximum(
+        0, image_height - cutout_center_height - pad_size_height)
+    left_pad = np.maximum(0, cutout_center_width - pad_size_width)
+    right_pad = np.maximum(0,
+                           image_width - cutout_center_width - pad_size_width)
+    cutout_shape = [
+        image_height - (lower_pad + upper_pad),
+        image_width - (left_pad + right_pad)
+    ]
+    padding_dims = [[lower_pad, upper_pad], [left_pad, right_pad]]
+    mask = np.pad(np.zeros(
+        cutout_shape, dtype=image.dtype),
+                  padding_dims,
+                  'constant',
+                  constant_values=1)
+    mask = np.expand_dims(mask, 2)
+    mask = np.tile(mask, [1, 1, 3])
+    return mask, mean
+def bbox_cutout(image, bboxes, pad_fraction, replace_with_mean):
+    """Applies cutout to the image according to bbox information.
+    This is a cutout variant that using bbox information to make more informed
+    decisions on where to place the cutout mask.
+    Args:
+        image: 3D uint8 Tensor.
+        bboxes: 2D Tensor that is a list of the bboxes in the image. Each bbox
+            has 4 elements (min_y, min_x, max_y, max_x) of type float with values
+            between [0, 1].
+        pad_fraction: Float that specifies how large the cutout mask should be in
+            in reference to the size of the original bbox. If pad_fraction is 0.25,
+            then the cutout mask will be of shape
+            (0.25 * bbox height, 0.25 * bbox width).
+        replace_with_mean: Boolean that specified what value should be filled in
+            where the cutout mask is applied. Since the incoming image will be of
+            uint8 and will not have had any mean normalization applied, by default
+            we set the value to be 128. If replace_with_mean is True then we find
+            the mean pixel values across the channel dimension and use those to fill
+            in where the cutout mask is applied.
+    Returns:
+        A tuple. First element is a tensor of the same shape as image that has
+        cutout applied to it. Second element is the bboxes that were passed in
+        that will be unchanged.
+    """
+    def apply_bbox_cutout(image, bboxes, pad_fraction):
+        """Applies cutout to a single bounding box within image."""
+        # Choose a single bounding box to apply cutout to.
+        random_index = np.random.randint(0, bboxes.shape[0], dtype=np.int32)
+        # Select the corresponding bbox and apply cutout.
+        chosen_bbox = np.take(bboxes, random_index, axis=0)
+        mask, mean = _cutout_inside_bbox(image, chosen_bbox, pad_fraction)
+        # When applying cutout we either set the pixel value to 128 or to the mean
+        # value inside the bbox.
+        replace = mean if replace_with_mean else [128] * 3
+        # Apply the cutout mask to the image. Where the mask is 0 we fill it with
+        # `replace`.
+        image = np.where(
+            np.equal(mask, 0),
+            np.ones_like(
+                image, dtype=image.dtype) * replace,
+            image).astype(image.dtype)
+        return image
+    # Check to see if there are boxes, if so then apply boxcutout.
+    if len(bboxes) != 0:
+        image = apply_bbox_cutout(image, bboxes, pad_fraction)
+    return image, bboxes
+NAME_TO_FUNC = {
+        'AutoContrast': autocontrast,
+        'Equalize': equalize,
+        'Posterize': posterize,
+        'Solarize': solarize,
+        'SolarizeAdd': solarize_add,
+        'Color': color,
+        'Contrast': contrast,
+        'Brightness': brightness,
+        'Sharpness': sharpness,
+        'Cutout': cutout,
+        'BBox_Cutout': bbox_cutout,
+        'Rotate_BBox': rotate_with_bboxes,
+        # pylint:disable=g-long-lambda
+        'TranslateX_BBox': lambda image, bboxes, pixels, replace: translate_bbox(
+                image, bboxes, pixels, replace, shift_horizontal=True),
+        'TranslateY_BBox': lambda image, bboxes, pixels, replace: translate_bbox(
+                image, bboxes, pixels, replace, shift_horizontal=False),
+        'ShearX_BBox': lambda image, bboxes, level, replace: shear_with_bboxes(
+                image, bboxes, level, replace, shear_horizontal=True),
+        'ShearY_BBox': lambda image, bboxes, level, replace: shear_with_bboxes(
+                image, bboxes, level, replace, shear_horizontal=False),
+        # pylint:enable=g-long-lambda
+        'Rotate_Only_BBoxes': rotate_only_bboxes,
+        'ShearX_Only_BBoxes': shear_x_only_bboxes,
+        'ShearY_Only_BBoxes': shear_y_only_bboxes,
+        'TranslateX_Only_BBoxes': translate_x_only_bboxes,
+        'TranslateY_Only_BBoxes': translate_y_only_bboxes,
+        'Flip_Only_BBoxes': flip_only_bboxes,
+        'Solarize_Only_BBoxes': solarize_only_bboxes,
+        'Equalize_Only_BBoxes': equalize_only_bboxes,
+        'Cutout_Only_BBoxes': cutout_only_bboxes,
+}
+def _randomly_negate_tensor(tensor):
+    """With 50% prob turn the tensor negative."""
+    should_flip = np.floor(np.random.rand() + 0.5) >= 1
+    final_tensor = tensor if should_flip else -tensor
+    return final_tensor
+def _rotate_level_to_arg(level):
+    level = (level / _MAX_LEVEL) * 30.
+    level = _randomly_negate_tensor(level)
+    return (level, )
+def _shrink_level_to_arg(level):
+    """Converts level to ratio by which we shrink the image content."""
+    if level == 0:
+        return (1.0, )  # if level is zero, do not shrink the image
+    # Maximum shrinking ratio is 2.9.
+    level = 2. / (_MAX_LEVEL / level) + 0.9
+    return (level, )
+def _enhance_level_to_arg(level):
+    return ((level / _MAX_LEVEL) * 1.8 + 0.1, )
+def _shear_level_to_arg(level):
+    level = (level / _MAX_LEVEL) * 0.3
+    # Flip level to negative with 50% chance.
+    level = _randomly_negate_tensor(level)
+    return (level, )
+def _translate_level_to_arg(level, translate_const):
+    level = (level / _MAX_LEVEL) * float(translate_const)
+    # Flip level to negative with 50% chance.
+    level = _randomly_negate_tensor(level)
+    return (level, )
+def _bbox_cutout_level_to_arg(level, hparams):
+    cutout_pad_fraction = (level /
+                           _MAX_LEVEL) * 0.75  # hparams.cutout_max_pad_fraction
+    return (cutout_pad_fraction, False)  # hparams.cutout_bbox_replace_with_mean
+def level_to_arg(hparams):
+    return {
+        'AutoContrast': lambda level: (),
+        'Equalize': lambda level: (),
+        'Posterize': lambda level: (int((level / _MAX_LEVEL) * 4), ),
+        'Solarize': lambda level: (int((level / _MAX_LEVEL) * 256), ),
+        'SolarizeAdd': lambda level: (int((level / _MAX_LEVEL) * 110), ),
+        'Color': _enhance_level_to_arg,
+        'Contrast': _enhance_level_to_arg,
+        'Brightness': _enhance_level_to_arg,
+        'Sharpness': _enhance_level_to_arg,
+        'Cutout':
+        lambda level: (int((level / _MAX_LEVEL) * 100), ),  # hparams.cutout_const=100
+        # pylint:disable=g-long-lambda
+        'BBox_Cutout': lambda level: _bbox_cutout_level_to_arg(level, hparams),
+        'TranslateX_BBox':
+        lambda level: _translate_level_to_arg(level, 250),  # hparams.translate_const=250
+        'TranslateY_BBox':
+        lambda level: _translate_level_to_arg(level, 250),  # hparams.translate_cons
+        # pylint:enable=g-long-lambda
+        'ShearX_BBox': _shear_level_to_arg,
+        'ShearY_BBox': _shear_level_to_arg,
+        'Rotate_BBox': _rotate_level_to_arg,
+        'Rotate_Only_BBoxes': _rotate_level_to_arg,
+        'ShearX_Only_BBoxes': _shear_level_to_arg,
+        'ShearY_Only_BBoxes': _shear_level_to_arg,
+        # pylint:disable=g-long-lambda
+        'TranslateX_Only_BBoxes':
+        lambda level: _translate_level_to_arg(level, 120),  # hparams.translate_bbox_const
+        'TranslateY_Only_BBoxes':
+        lambda level: _translate_level_to_arg(level, 120),  # hparams.translate_bbox_const
+        # pylint:enable=g-long-lambda
+        'Flip_Only_BBoxes': lambda level: (),
+        'Solarize_Only_BBoxes':
+        lambda level: (int((level / _MAX_LEVEL) * 256), ),
+        'Equalize_Only_BBoxes': lambda level: (),
+        # pylint:disable=g-long-lambda
+        'Cutout_Only_BBoxes':
+        lambda level: (int((level / _MAX_LEVEL) * 50), ),  # hparams.cutout_bbox_const
+        # pylint:enable=g-long-lambda
+    }
+def bbox_wrapper(func):
+    """Adds a bboxes function argument to func and returns unchanged bboxes."""
+    def wrapper(images, bboxes, *args, **kwargs):
+        return (func(images, *args, **kwargs), bboxes)
+    return wrapper
+def _parse_policy_info(name, prob, level, replace_value, augmentation_hparams):
+    """Return the function that corresponds to `name` and update `level` param."""
+    func = NAME_TO_FUNC[name]
+    args = level_to_arg(augmentation_hparams)[name](level)
+    # Check to see if prob is passed into function. This is used for operations
+    # where we alter bboxes independently.
+    # pytype:disable=wrong-arg-types
+    if 'prob' in inspect.getargspec(func)[0]:
+        args = tuple([prob] + list(args))
+    # pytype:enable=wrong-arg-types
+    # Add in replace arg if it is required for the function that is being called.
+    if 'replace' in inspect.getargspec(func)[0]:
+        # Make sure replace is the final argument
+        assert 'replace' == inspect.getargspec(func)[0][-1]
+        args = tuple(list(args) + [replace_value])
+    # Add bboxes as the second positional argument for the function if it does
+    # not already exist.
+    if 'bboxes' not in inspect.getargspec(func)[0]:
+        func = bbox_wrapper(func)
+    return (func, prob, args)
+def _apply_func_with_prob(func, image, args, prob, bboxes):
+    """Apply `func` to image w/ `args` as input with probability `prob`."""
+    assert isinstance(args, tuple)
+    assert 'bboxes' == inspect.getargspec(func)[0][1]
+    # If prob is a function argument, then this randomness is being handled
+    # inside the function, so make sure it is always called.
+    if 'prob' in inspect.getargspec(func)[0]:
+        prob = 1.0
+    # Apply the function with probability `prob`.
+    should_apply_op = np.floor(np.random.rand() + 0.5) >= 1
+    if should_apply_op:
+        augmented_image, augmented_bboxes = func(image, bboxes, *args)
+    else:
+        augmented_image, augmented_bboxes = (image, bboxes)
+    return augmented_image, augmented_bboxes
+def select_and_apply_random_policy(policies, image, bboxes):
+    """Select a random policy from `policies` and apply it to `image`."""
+    policy_to_select = np.random.randint(0, len(policies), dtype=np.int32)
+    # policy_to_select = 6 # for test
+    for (i, policy) in enumerate(policies):
+        if i == policy_to_select:
+            image, bboxes = policy(image, bboxes)
+    return (image, bboxes)
+def build_and_apply_nas_policy(policies, image, bboxes, augmentation_hparams):
+    """Build a policy from the given policies passed in and apply to image.
+    Args:
+        policies: list of lists of tuples in the form `(func, prob, level)`, `func`
+            is a string name of the augmentation function, `prob` is the probability
+            of applying the `func` operation, `level` is the input argument for
+            `func`.
+        image: numpy array that the resulting policy will be applied to.
+        bboxes:
+        augmentation_hparams: Hparams associated with the NAS learned policy.
+    Returns:
+        A version of image that now has data augmentation applied to it based on
+        the `policies` pass into the function. Additionally, returns bboxes if
+        a value for them is passed in that is not None
+    """
+    replace_value = [128, 128, 128]
+    # func is the string name of the augmentation function, prob is the
+    # probability of applying the operation and level is the parameter associated
+    # tf_policies are functions that take in an image and return an augmented
+    # image.
+    tf_policies = []
+    for policy in policies:
+        tf_policy = []
+        # Link string name to the correct python function and make sure the correct
+        # argument is passed into that function.
+        for policy_info in policy:
+            policy_info = list(
+                policy_info) + [replace_value, augmentation_hparams]
+            tf_policy.append(_parse_policy_info(*policy_info))
+        # Now build the tf policy that will apply the augmentation procedue
+        # on image.
+        def make_final_policy(tf_policy_):
+            def final_policy(image_, bboxes_):
+                for func, prob, args in tf_policy_:
+                    image_, bboxes_ = _apply_func_with_prob(func, image_, args,
+                                                            prob, bboxes_)
+                return image_, bboxes_
+            return final_policy
+        tf_policies.append(make_final_policy(tf_policy))
+    augmented_images, augmented_bboxes = select_and_apply_random_policy(
+        tf_policies, image, bboxes)
+    # If no bounding boxes were specified, then just return the images.
+    return (augmented_images, augmented_bboxes)
+# TODO(barretzoph): Add in ArXiv link once paper is out.
+def distort_image_with_autoaugment(image, bboxes, augmentation_name):
+    """Applies the AutoAugment policy to `image` and `bboxes`.
+    Args:
+        image: `Tensor` of shape [height, width, 3] representing an image.
+        bboxes: `Tensor` of shape [N, 4] representing ground truth boxes that are
+            normalized between [0, 1].
+        augmentation_name: The name of the AutoAugment policy to use. The available
+            options are `v0`, `v1`, `v2`, `v3` and `test`. `v0` is the policy used for
+            all of the results in the paper and was found to achieve the best results
+            on the COCO dataset. `v1`, `v2` and `v3` are additional good policies
+            found on the COCO dataset that have slight variation in what operations
+            were used during the search procedure along with how many operations are
+            applied in parallel to a single image (2 vs 3).
+    Returns:
+        A tuple containing the augmented versions of `image` and `bboxes`.
+    """
+    available_policies = {
+        'v0': policy_v0,
+        'v1': policy_v1,
+        'v2': policy_v2,
+        'v3': policy_v3,
+        'test': policy_vtest
+    }
+    if augmentation_name not in available_policies:
+        raise ValueError('Invalid augmentation_name: {}'.format(
+            augmentation_name))
+    policy = available_policies[augmentation_name]()
+    augmentation_hparams = {}
+    return build_and_apply_nas_policy(policy, image, bboxes,
+                                      augmentation_hparams)
--- a/ppdet/data/transform/operators.py
+++ b/ppdet/data/transform/operators.py
@@ -435,6 +435,75 @@ class RandomFlipImage(BaseOperator):
        return sample
+@register_op
+class AutoAugmentImage(BaseOperator):
+    def __init__(self, is_normalized=False, autoaug_type="v1"):
+        """
+        Args:
+            is_normalized (bool): whether the bbox scale to [0,1]
+            autoaug_type (str): autoaug type, support v0, v1, v2, v3, test
+        """
+        super(AutoAugmentImage, self).__init__()
+        self.is_normalized = is_normalized
+        self.autoaug_type = autoaug_type
+        if not isinstance(self.is_normalized, bool):
+            raise TypeError("{}: input type is invalid.".format(self))
+    def __call__(self, sample, context=None):
+        """
+        Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172
+        """
+        samples = sample
+        batch_input = True
+        if not isinstance(samples, Sequence):
+            batch_input = False
+            samples = [samples]
+        for sample in samples:
+            gt_bbox = sample['gt_bbox']
+            im = sample['image']
+            if not isinstance(im, np.ndarray):
+                raise TypeError("{}: image is not a numpy array.".format(self))
+            if len(im.shape) != 3:
+                raise ImageError("{}: image is not 3-dimensional.".format(self))
+            if len(gt_bbox) == 0:
+                continue
+            # gt_boxes : [x1, y1, x2, y2]
+            # norm_gt_boxes: [y1, x1, y2, x2]
+            height, width, _ = im.shape
+            norm_gt_bbox = np.ones_like(gt_bbox, dtype=np.float32)
+            if not self.is_normalized:
+                norm_gt_bbox[:, 0] = gt_bbox[:, 1] / float(height)
+                norm_gt_bbox[:, 1] = gt_bbox[:, 0] / float(width)
+                norm_gt_bbox[:, 2] = gt_bbox[:, 3] / float(height)
+                norm_gt_bbox[:, 3] = gt_bbox[:, 2] / float(width)
+            else:
+                norm_gt_bbox[:, 0] = gt_bbox[:, 1]
+                norm_gt_bbox[:, 1] = gt_bbox[:, 0]
+                norm_gt_bbox[:, 2] = gt_bbox[:, 3]
+                norm_gt_bbox[:, 3] = gt_bbox[:, 2]
+            from .autoaugment_utils import distort_image_with_autoaugment
+            im, norm_gt_bbox = distort_image_with_autoaugment(im, norm_gt_bbox,
+                                                              self.autoaug_type)
+            if not self.is_normalized:
+                gt_bbox[:, 0] = norm_gt_bbox[:, 1] * float(width)
+                gt_bbox[:, 1] = norm_gt_bbox[:, 0] * float(height)
+                gt_bbox[:, 2] = norm_gt_bbox[:, 3] * float(width)
+                gt_bbox[:, 3] = norm_gt_bbox[:, 2] * float(height)
+            else:
+                gt_bbox[:, 0] = norm_gt_bbox[:, 1]
+                gt_bbox[:, 1] = norm_gt_bbox[:, 0]
+                gt_bbox[:, 2] = norm_gt_bbox[:, 3]
+                gt_bbox[:, 3] = norm_gt_bbox[:, 2]
+            sample['gt_bbox'] = gt_bbox
+            sample['image'] = im
+        sample = samples if batch_input else samples[0]
+        return sample
 @register_op
 class NormalizeImage(BaseOperator):
    def __init__(self,