diff --git a/official/modeling/training/distributed_executor.py b/official/modeling/training/distributed_executor.py index df463a23711d873f7cc4e0e2ab85e042e9614f94..10b13c9061e315c1a343d877b6cd6a86e92a3f83 100644 --- a/official/modeling/training/distributed_executor.py +++ b/official/modeling/training/distributed_executor.py @@ -248,9 +248,10 @@ class DistributedExecutor(object): _replicated_step, args=(next(iterator),)) # For reporting, we returns the mean of losses. - loss = strategy.reduce( - tf.distribute.ReduceOp.MEAN, per_replica_losses, axis=None) - return loss + losses = tf.nest.map_structure( + lambda x: strategy.reduce(tf.distribute.ReduceOp.MEAN, x, axis=None), + per_replica_losses) + return losses return train_step diff --git a/official/vision/detection/configs/maskrcnn_config.py b/official/vision/detection/configs/maskrcnn_config.py index cf8ae404e860bee050f7dec94c52c615fc14482f..e84b24b474fdf8a4ca72dc50a0cf1dcc1c3b07fd 100644 --- a/official/vision/detection/configs/maskrcnn_config.py +++ b/official/vision/detection/configs/maskrcnn_config.py @@ -71,6 +71,9 @@ MASKRCNN_CFG.override({ 'min_level': 2, 'max_level': 6, 'anchors_per_location': 3, + 'num_convs': 2, + 'num_filters': 256, + 'use_separable_conv': False, 'use_batch_norm': False, 'batch_norm': { 'batch_norm_momentum': 0.997, @@ -83,7 +86,11 @@ MASKRCNN_CFG.override({ # Note that `num_classes` is the total number of classes including # one background classes whose index is 0. 'num_classes': 91, - 'fast_rcnn_mlp_head_dim': 1024, + 'num_convs': 0, + 'num_filters': 256, + 'use_separable_conv': False, + 'num_fcs': 2, + 'fc_dims': 1024, 'use_batch_norm': False, 'batch_norm': { 'batch_norm_momentum': 0.997, @@ -95,6 +102,9 @@ MASKRCNN_CFG.override({ 'mrcnn_head': { 'num_classes': 91, 'mask_target_size': 28, + 'num_convs': 4, + 'num_filters': 256, + 'use_separable_conv': False, 'use_batch_norm': False, 'batch_norm': { 'batch_norm_momentum': 0.997, diff --git a/official/vision/detection/dataloader/maskrcnn_parser.py b/official/vision/detection/dataloader/maskrcnn_parser.py index 97de2ba19e5e614ab26a4eb3b151a86a0aaf9374..0f446190bf5db2cb517b6f18ef58974d3e043075 100644 --- a/official/vision/detection/dataloader/maskrcnn_parser.py +++ b/official/vision/detection/dataloader/maskrcnn_parser.py @@ -353,7 +353,9 @@ class Parser(object): self._anchor_size, (image_height, image_width)) - labels = {} + labels = { + 'image_info': image_info, + } if self._mode == ModeKeys.PREDICT_WITH_GT: # Converts boxes from normalized coordinates to pixel coordinates. diff --git a/official/vision/detection/executor/detection_executor.py b/official/vision/detection/executor/detection_executor.py index aef17f41c9b159764edb529d1884d5704f58f0d8..61b4058c1629b3f99a45d2a1ab8d9a783bd43bb6 100644 --- a/official/vision/detection/executor/detection_executor.py +++ b/official/vision/detection/executor/detection_executor.py @@ -82,7 +82,7 @@ class DetectionDistributedExecutor(executor.DistributedExecutor): grads = tape.gradient(loss, trainable_variables) optimizer.apply_gradients(zip(grads, trainable_variables)) - return loss + return losses return _replicated_step diff --git a/official/vision/detection/modeling/architecture/factory.py b/official/vision/detection/modeling/architecture/factory.py index 0eeb9872f58996ca0ef46ec569f282ad586a7836..bc8c2cca3ac0fdb5df7c312cbab46e29c89f8870 100644 --- a/official/vision/detection/modeling/architecture/factory.py +++ b/official/vision/detection/modeling/architecture/factory.py @@ -94,6 +94,10 @@ def rpn_head_generator(params): return heads.RpnHead(params.min_level, params.max_level, params.anchors_per_location, + params.num_convs, + params.num_filters, + params.use_separable_conv, + params.use_batch_norm, batch_norm_relu=batch_norm_relu_generator( params.batch_norm)) @@ -101,7 +105,12 @@ def rpn_head_generator(params): def fast_rcnn_head_generator(params): """Generator function for Fast R-CNN head architecture.""" return heads.FastrcnnHead(params.num_classes, - params.fast_rcnn_mlp_head_dim, + params.num_convs, + params.num_filters, + params.use_separable_conv, + params.num_fcs, + params.fc_dims, + params.use_batch_norm, batch_norm_relu=batch_norm_relu_generator( params.batch_norm)) @@ -110,6 +119,10 @@ def mask_rcnn_head_generator(params): """Generator function for Mask R-CNN head architecture.""" return heads.MaskrcnnHead(params.num_classes, params.mask_target_size, + params.num_convs, + params.num_filters, + params.use_separable_conv, + params.use_batch_norm, batch_norm_relu=batch_norm_relu_generator( params.batch_norm)) diff --git a/official/vision/detection/modeling/architecture/fpn.py b/official/vision/detection/modeling/architecture/fpn.py index 488a9f06d29d2f85a6c1456c49d5176ff0420270..d74f54133feb0b9c1a633fe4061921ddf48297d9 100644 --- a/official/vision/detection/modeling/architecture/fpn.py +++ b/official/vision/detection/modeling/architecture/fpn.py @@ -24,6 +24,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools + import tensorflow.compat.v2 as tf from tensorflow.python.keras import backend @@ -39,6 +41,7 @@ class Fpn(object): max_level=7, fpn_feat_dims=256, use_separable_conv=False, + use_batch_norm=True, batch_norm_relu=nn_ops.BatchNormRelu): """FPN initialization function. @@ -48,12 +51,19 @@ class Fpn(object): fpn_feat_dims: `int` number of filters in FPN layers. use_separable_conv: `bool`, if True use separable convolution for convolution in FPN layers. + use_batch_norm: 'bool', indicating whether batchnorm layers are added. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._min_level = min_level self._max_level = max_level self._fpn_feat_dims = fpn_feat_dims + if use_separable_conv: + self._conv2d_op = functools.partial( + tf.keras.layers.SeparableConv2D, depth_multiplier=1) + else: + self._conv2d_op = tf.keras.layers.Conv2D + self._use_batch_norm = use_batch_norm self._batch_norm_relu = batch_norm_relu self._batch_norm_relus = {} @@ -61,47 +71,26 @@ class Fpn(object): self._post_hoc_conv2d_op = {} self._coarse_conv2d_op = {} for level in range(self._min_level, self._max_level + 1): - self._batch_norm_relus[level] = batch_norm_relu( - relu=False, name='p%d-bn' % level) - if use_separable_conv: - self._lateral_conv2d_op[level] = tf.keras.layers.SeparableConv2D( - filters=self._fpn_feat_dims, - kernel_size=(1, 1), - padding='same', - depth_multiplier=1, - name='l%d' % level) - self._post_hoc_conv2d_op[level] = tf.keras.layers.SeparableConv2D( - filters=self._fpn_feat_dims, - strides=(1, 1), - kernel_size=(3, 3), - padding='same', - depth_multiplier=1, - name='post_hoc_d%d' % level) - self._coarse_conv2d_op[level] = tf.keras.layers.SeparableConv2D( - filters=self._fpn_feat_dims, - strides=(2, 2), - kernel_size=(3, 3), - padding='same', - depth_multiplier=1, - name='p%d' % level) - else: - self._lateral_conv2d_op[level] = tf.keras.layers.Conv2D( - filters=self._fpn_feat_dims, - kernel_size=(1, 1), - padding='same', - name='l%d' % level) - self._post_hoc_conv2d_op[level] = tf.keras.layers.Conv2D( - filters=self._fpn_feat_dims, - strides=(1, 1), - kernel_size=(3, 3), - padding='same', - name='post_hoc_d%d' % level) - self._coarse_conv2d_op[level] = tf.keras.layers.Conv2D( - filters=self._fpn_feat_dims, - strides=(2, 2), - kernel_size=(3, 3), - padding='same', - name='p%d' % level) + if self._use_batch_norm: + self._batch_norm_relus[level] = batch_norm_relu( + relu=False, name='p%d-bn' % level) + self._lateral_conv2d_op[level] = self._conv2d_op( + filters=self._fpn_feat_dims, + kernel_size=(1, 1), + padding='same', + name='l%d' % level) + self._post_hoc_conv2d_op[level] = self._conv2d_op( + filters=self._fpn_feat_dims, + strides=(1, 1), + kernel_size=(3, 3), + padding='same', + name='post_hoc_d%d' % level) + self._coarse_conv2d_op[level] = self._conv2d_op( + filters=self._fpn_feat_dims, + strides=(2, 2), + kernel_size=(3, 3), + padding='same', + name='p%d' % level) def __call__(self, multilevel_features, is_training=None): """Returns the FPN features for a given multilevel features. @@ -117,7 +106,7 @@ class Fpn(object): [min_level, min_level + 1, ..., max_level]. The values are corresponding FPN features with shape [batch_size, height_l, width_l, fpn_feat_dims]. """ - input_levels = multilevel_features.keys() + input_levels = list(multilevel_features.keys()) if min(input_levels) > self._min_level: raise ValueError( 'The minimum backbone level %d should be '%(min(input_levels)) + @@ -146,8 +135,9 @@ class Fpn(object): if level > backbone_max_level + 1: feats_in = tf.nn.relu(feats_in) feats[level] = self._coarse_conv2d_op[level](feats_in) - # Adds batch_norm layer. - for level in range(self._min_level, self._max_level + 1): - feats[level] = self._batch_norm_relus[level]( - feats[level], is_training=is_training) + if self._use_batch_norm: + # Adds batch_norm layer. + for level in range(self._min_level, self._max_level + 1): + feats[level] = self._batch_norm_relus[level]( + feats[level], is_training=is_training) return feats diff --git a/official/vision/detection/modeling/architecture/heads.py b/official/vision/detection/modeling/architecture/heads.py index 2aa047975b58e833819fe520a1acac6eacbe9d62..591d6e9964b905b1c47066795c01274a0f66dd4d 100644 --- a/official/vision/detection/modeling/architecture/heads.py +++ b/official/vision/detection/modeling/architecture/heads.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools import pickle from absl import logging @@ -35,6 +36,10 @@ class RpnHead(object): min_level, max_level, anchors_per_location, + num_convs=2, + num_filters=256, + use_separable_conv=False, + use_batch_norm=True, batch_norm_relu=nn_ops.BatchNormRelu): """Initialize params to build Region Proposal Network head. @@ -43,48 +48,67 @@ class RpnHead(object): max_level: `int` number of maximum feature level. anchors_per_location: `int` number of number of anchors per pixel location. + num_convs: `int` number that represents the number of the intermediate + conv layers before the prediction. + num_filters: `int` number that represents the number of filters of the + intermediate conv layers. + use_separable_conv: `bool`, indicating whether the separable conv layers + is used. + use_batch_norm: 'bool', indicating whether batchnorm layers are added. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._min_level = min_level self._max_level = max_level self._anchors_per_location = anchors_per_location + self._use_batch_norm = use_batch_norm + + if use_separable_conv: + self._conv2d_op = functools.partial( + tf.keras.layers.SeparableConv2D, + depth_multiplier=1, + bias_initializer=tf.zeros_initializer()) + else: + self._conv2d_op = functools.partial( + tf.keras.layers.Conv2D, + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), + bias_initializer=tf.zeros_initializer()) + self._rpn_conv = tf.keras.layers.Conv2D( - 256, + num_filters, kernel_size=(3, 3), strides=(1, 1), - activation=None, - bias_initializer=tf.zeros_initializer(), - kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), + activation=(None if self._use_batch_norm else tf.nn.relu), padding='same', name='rpn') self._rpn_class_conv = tf.keras.layers.Conv2D( anchors_per_location, kernel_size=(1, 1), strides=(1, 1), - bias_initializer=tf.zeros_initializer(), - kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), padding='valid', name='rpn-class') self._rpn_box_conv = tf.keras.layers.Conv2D( 4 * anchors_per_location, kernel_size=(1, 1), strides=(1, 1), - bias_initializer=tf.zeros_initializer(), - kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), padding='valid', name='rpn-box') + self._batch_norm_relus = {} for level in range(self._min_level, self._max_level + 1): - self._batch_norm_relus[level] = batch_norm_relu(name='rpn%d-bn' % level) + if self._use_batch_norm: + self._batch_norm_relus[level] = batch_norm_relu(name='rpn-l%d-bn' % + level) def _shared_rpn_heads(self, features, anchors_per_location, level, is_training): """Shared RPN heads.""" # TODO(chiachenc): check the channel depth of the first convoultion. features = self._rpn_conv(features) - # The batch normalization layers are not shared between levels. - features = self._batch_norm_relus[level](features, is_training=is_training) + if self._use_batch_norm: + # The batch normalization layers are not shared between levels. + features = self._batch_norm_relus[level]( + features, is_training=is_training) # Proposal classification scores scores = self._rpn_class_conv(features) # Proposal bbox regression deltas @@ -111,19 +135,51 @@ class FastrcnnHead(object): def __init__(self, num_classes, - mlp_head_dim, + num_convs=0, + num_filters=256, + use_separable_conv=False, + num_fcs=2, + fc_dims=1024, + use_batch_norm=True, batch_norm_relu=nn_ops.BatchNormRelu): """Initialize params to build Fast R-CNN box head. Args: num_classes: a integer for the number of classes. - mlp_head_dim: a integer that is the hidden dimension in the - fully-connected layers. + num_convs: `int` number that represents the number of the intermediate + conv layers before the FC layers. + num_filters: `int` number that represents the number of filters of the + intermediate conv layers. + use_separable_conv: `bool`, indicating whether the separable conv layers + is used. + num_fcs: `int` number that represents the number of FC layers before the + predictions. + fc_dims: `int` number that represents the number of dimension of the FC + layers. + use_batch_norm: 'bool', indicating whether batchnorm layers are added. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._num_classes = num_classes - self._mlp_head_dim = mlp_head_dim + + self._num_convs = num_convs + self._num_filters = num_filters + if use_separable_conv: + self._conv2d_op = functools.partial( + tf.keras.layers.SeparableConv2D, + depth_multiplier=1, + bias_initializer=tf.zeros_initializer()) + else: + self._conv2d_op = functools.partial( + tf.keras.layers.Conv2D, + kernel_initializer=tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + bias_initializer=tf.zeros_initializer()) + + self._num_fcs = num_fcs + self._fc_dims = fc_dims + + self._use_batch_norm = use_batch_norm self._batch_norm_relu = batch_norm_relu def __call__(self, roi_features, is_training=None): @@ -145,17 +201,33 @@ class FastrcnnHead(object): with backend.get_graph().as_default(), tf.name_scope('fast_rcnn_head'): # reshape inputs beofre FC. _, num_rois, height, width, filters = roi_features.get_shape().as_list() - roi_features = tf.reshape(roi_features, - [-1, num_rois, height * width * filters]) - net = tf.keras.layers.Dense( - units=self._mlp_head_dim, activation=None, name='fc6')( - roi_features) - - net = self._batch_norm_relu(fused=False)(net, is_training=is_training) - net = tf.keras.layers.Dense( - units=self._mlp_head_dim, activation=None, name='fc7')( - net) - net = self._batch_norm_relu(fused=False)(net, is_training=is_training) + + net = tf.reshape(roi_features, [-1, height, width, filters]) + for i in range(self._num_convs): + net = self._conv2d_op( + self._num_filters, + kernel_size=(3, 3), + strides=(1, 1), + padding='same', + dilation_rate=(1, 1), + activation=(None if self._use_batch_norm else tf.nn.relu), + name='conv_{}'.format(i))(net) + if self._use_batch_norm: + net = self._batch_norm_relu()(net, is_training=is_training) + + filters = self._num_filters if self._num_convs > 0 else filters + net = tf.reshape(net, [-1, num_rois, height * width * filters]) + + if self._use_batch_norm: + net = self._batch_norm_relu(fused=False)(net, is_training=is_training) + for i in range(self._num_fcs): + net = tf.keras.layers.Dense( + units=self._fc_dims, + activation=(None if self._use_batch_norm else tf.nn.relu), + name='fc{}'.format(i+6))( + net) + if self._use_batch_norm: + net = self._batch_norm_relu(fused=False)(net, is_training=is_training) class_outputs = tf.keras.layers.Dense( self._num_classes, @@ -178,17 +250,44 @@ class MaskrcnnHead(object): def __init__(self, num_classes, mask_target_size, + num_convs=4, + num_filters=256, + use_separable_conv=False, + use_batch_norm=True, batch_norm_relu=nn_ops.BatchNormRelu): """Initialize params to build Fast R-CNN head. Args: num_classes: a integer for the number of classes. mask_target_size: a integer that is the resolution of masks. + num_convs: `int` number that represents the number of the intermediate + conv layers before the prediction. + num_filters: `int` number that represents the number of filters of the + intermediate conv layers. + use_separable_conv: `bool`, indicating whether the separable conv layers + is used. + use_batch_norm: 'bool', indicating whether batchnorm layers are added. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._num_classes = num_classes self._mask_target_size = mask_target_size + + self._num_convs = num_convs + self._num_filters = num_filters + if use_separable_conv: + self._conv2d_op = functools.partial( + tf.keras.layers.SeparableConv2D, + depth_multiplier=1, + bias_initializer=tf.zeros_initializer()) + else: + self._conv2d_op = functools.partial( + tf.keras.layers.Conv2D, + kernel_initializer=tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + bias_initializer=tf.zeros_initializer()) + + self._use_batch_norm = use_batch_norm self._batch_norm_relu = batch_norm_relu def __call__(self, roi_features, class_indices, is_training=None): @@ -200,6 +299,7 @@ class MaskrcnnHead(object): class_indices: a Tensor of shape [batch_size, num_rois], indicating which class the ROI is. is_training: `boolean`, if True if model is in training mode. + Returns: mask_outputs: a tensor with a shape of [batch_size, num_masks, mask_height, mask_width, num_classes], @@ -211,64 +311,43 @@ class MaskrcnnHead(object): boxes is not 4. """ - def _get_stddev_equivalent_to_msra_fill(kernel_size, fan_out): - """Returns the stddev of random normal initialization as MSRAFill.""" - # Reference: https://github.com/pytorch/pytorch/blob/master/caffe2/operators/filler_op.h#L445-L463 # pylint: disable=line-too-long - # For example, kernel size is (3, 3) and fan out is 256, stddev is 0.029. - # stddev = (2/(3*3*256))^0.5 = 0.029 - return (2 / (kernel_size[0] * kernel_size[1] * fan_out)) ** 0.5 - with backend.get_graph().as_default(): with tf.name_scope('mask_head'): _, num_rois, height, width, filters = roi_features.get_shape().as_list() net = tf.reshape(roi_features, [-1, height, width, filters]) - for i in range(4): - kernel_size = (3, 3) - fan_out = 256 - init_stddev = _get_stddev_equivalent_to_msra_fill( - kernel_size, fan_out) - net = tf.keras.layers.Conv2D( - fan_out, - kernel_size=kernel_size, + for i in range(self._num_convs): + net = self._conv2d_op( + self._num_filters, + kernel_size=(3, 3), strides=(1, 1), padding='same', dilation_rate=(1, 1), - activation=None, - kernel_initializer=tf.keras.initializers.RandomNormal( - stddev=init_stddev), - bias_initializer=tf.zeros_initializer(), + activation=(None if self._use_batch_norm else tf.nn.relu), name='mask-conv-l%d' % i)( net) - net = self._batch_norm_relu()(net, is_training=is_training) + if self._use_batch_norm: + net = self._batch_norm_relu()(net, is_training=is_training) - kernel_size = (2, 2) - fan_out = 256 - init_stddev = _get_stddev_equivalent_to_msra_fill(kernel_size, fan_out) net = tf.keras.layers.Conv2DTranspose( - fan_out, - kernel_size=kernel_size, + self._num_filters, + kernel_size=(2, 2), strides=(2, 2), padding='valid', - activation=None, - kernel_initializer=tf.keras.initializers.RandomNormal( - stddev=init_stddev), + activation=(None if self._use_batch_norm else tf.nn.relu), + kernel_initializer=tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), bias_initializer=tf.zeros_initializer(), name='conv5-mask')( net) - net = self._batch_norm_relu()(net, is_training=is_training) - - kernel_size = (1, 1) - fan_out = self._num_classes - init_stddev = _get_stddev_equivalent_to_msra_fill(kernel_size, fan_out) - mask_outputs = tf.keras.layers.Conv2D( - fan_out, - kernel_size=kernel_size, + if self._use_batch_norm: + net = self._batch_norm_relu()(net, is_training=is_training) + + mask_outputs = self._conv2d_op( + self._num_classes, + kernel_size=(1, 1), strides=(1, 1), padding='valid', - kernel_initializer=tf.keras.initializers.RandomNormal( - stddev=init_stddev), - bias_initializer=tf.zeros_initializer(), name='mask_fcn_logits')( net) mask_outputs = tf.reshape(mask_outputs, [ diff --git a/official/vision/detection/modeling/architecture/resnet.py b/official/vision/detection/modeling/architecture/resnet.py index cd45907d1bd3dca7cd4d0c6e35e157bf5042d8a3..6654451988ef3f377723eec3cdeb2f278f509095 100644 --- a/official/vision/detection/modeling/architecture/resnet.py +++ b/official/vision/detection/modeling/architecture/resnet.py @@ -55,34 +55,13 @@ class Resnet(object): self._data_format = data_format model_params = { - 10: { - 'block': self.residual_block, - 'layers': [1, 1, 1, 1] - }, - 18: { - 'block': self.residual_block, - 'layers': [2, 2, 2, 2] - }, - 34: { - 'block': self.residual_block, - 'layers': [3, 4, 6, 3] - }, - 50: { - 'block': self.bottleneck_block, - 'layers': [3, 4, 6, 3] - }, - 101: { - 'block': self.bottleneck_block, - 'layers': [3, 4, 23, 3] - }, - 152: { - 'block': self.bottleneck_block, - 'layers': [3, 8, 36, 3] - }, - 200: { - 'block': self.bottleneck_block, - 'layers': [3, 24, 36, 3] - } + 10: {'block': self.residual_block, 'layers': [1, 1, 1, 1]}, + 18: {'block': self.residual_block, 'layers': [2, 2, 2, 2]}, + 34: {'block': self.residual_block, 'layers': [3, 4, 6, 3]}, + 50: {'block': self.bottleneck_block, 'layers': [3, 4, 6, 3]}, + 101: {'block': self.bottleneck_block, 'layers': [3, 4, 23, 3]}, + 152: {'block': self.bottleneck_block, 'layers': [3, 8, 36, 3]}, + 200: {'block': self.bottleneck_block, 'layers': [3, 24, 36, 3]} } if resnet_depth not in model_params: diff --git a/official/vision/detection/modeling/base_model.py b/official/vision/detection/modeling/base_model.py index 468a14fd447d3428296ca4840b248fd0a3df6f53..a730ef5cb5c8a6cf573777d12b72cba3484ed0a7 100644 --- a/official/vision/detection/modeling/base_model.py +++ b/official/vision/detection/modeling/base_model.py @@ -93,6 +93,11 @@ class Model(object): def __init__(self, params): self._use_bfloat16 = params.architecture.use_bfloat16 + if params.architecture.use_bfloat16: + policy = tf.compat.v2.keras.mixed_precision.experimental.Policy( + 'mixed_bfloat16') + tf.compat.v2.keras.mixed_precision.experimental.set_policy(policy) + # Optimization. self._optimizer_fn = OptimizerFactory(params.train.optimizer) self._learning_rate = learning_rates.learning_rate_generator( diff --git a/official/vision/detection/ops/sampling_ops.py b/official/vision/detection/ops/sampling_ops.py index 76e04357f461aba3bb488e0322318cc22eadcc2d..1777b9da7b620e5b3e5089a93e68660226644a2b 100644 --- a/official/vision/detection/ops/sampling_ops.py +++ b/official/vision/detection/ops/sampling_ops.py @@ -83,7 +83,7 @@ def box_matching(boxes, gt_boxes, gt_classes): matched_gt_boxes = tf.gather_nd(gt_boxes, gather_nd_indices) matched_gt_boxes = tf.where( tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, 4]), - tf.zeros_like(matched_gt_boxes, dtype=tf.float32), + tf.zeros_like(matched_gt_boxes, dtype=matched_gt_boxes.dtype), matched_gt_boxes) matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices)