Adds an ExtractImagePatchesOp that performs im2col. Forward only...

Adds an ExtractImagePatchesOp that performs im2col. Forward only implementation for now. Fixes #2605. Change: 123983077

Adds an ExtractImagePatchesOp that performs im2col. Forward only...
Adds an ExtractImagePatchesOp that performs im2col. Forward only implementation for now. Fixes #2605. Change: 123983077
6b1b429d · A. Unique TensorFlower · TensorFlower Gardener · 88cabd66 · 6b1b429d · 6b1b429d
7 changed file
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -71,6 +71,19 @@ cc_library(
    ],
 )

+tf_kernel_library(
+    name = "extract_image_patches_op",
+    prefix = "extract_image_patches_op",
+    deps = [
+        ":bounds_check",
+        ":eigen_helpers",
+        ":ops_util",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//third_party/eigen3",
+    ],
+)
+
 cc_library(
    name = "conv_3d",
    hdrs = ["conv_3d.h"],
@@ -333,6 +346,7 @@ tf_kernel_libraries(
        ":concat_lib",
        ":cuda_device_array",
        ":depth_space_ops",
+        ":extract_image_patches_op",
        ":fill_functor",
        ":ops_util",
        ":spacetobatch_op",

--- a/tensorflow/core/kernels/extract_image_patches_op.cc
+++ b/tensorflow/core/kernels/extract_image_patches_op.cc
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/image_ops.cc.
+
+#define USE_EIGEN_TENSOR
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/kernels/extract_image_patches_op.h"
+#include <vector>
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/util/tensor_format.h"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+static inline void ParseAttributeVec4(OpKernelConstruction* context,
+                                      const string& attr_name,
+                                      std::vector<int32>* attr) {
+  OP_REQUIRES_OK(context, context->GetAttr(attr_name, attr));
+  OP_REQUIRES(
+      context, (*attr)[0] == 1 && (*attr)[3] == 1,
+      errors::Unimplemented("Only support", attr_name, "across space."));
+  OP_REQUIRES(context, (*attr)[1] >= 1 && (*attr)[2] >= 1,
+              errors::OutOfRange(attr_name, "is out of range."));
+}
+
+template <typename Device, typename T>
+class ExtractImagePatchesOp : public UnaryOp<T> {
+ public:
+  explicit ExtractImagePatchesOp(OpKernelConstruction* context)
+      : UnaryOp<T>(context) {
+    ParseAttributeVec4(context, "ksizes", &ksizes_);
+    ParseAttributeVec4(context, "strides", &strides_);
+    ParseAttributeVec4(context, "rates", &rates_);
+    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Input tensor is of the following dimensions:
+    // [ batch, in_rows, in_cols, channels ]
+    const Tensor& input = context->input(0);
+    OP_REQUIRES(context, input.dims() == 4,
+                errors::InvalidArgument("input must be 4-dimensional",
+                                        input.shape().DebugString()));
+
+    const int batch = input.dim_size(0);
+    const int in_rows = input.dim_size(1);
+    const int in_cols = input.dim_size(2);
+    const int depth = input.dim_size(3);
+
+    const int ksize_rows = ksizes_[1];
+    const int ksize_cols = ksizes_[2];
+
+    const int stride_rows = strides_[1];
+    const int stride_cols = strides_[2];
+
+    const int rate_rows = rates_[1];
+    const int rate_cols = rates_[2];
+
+    const int ksize_rows_eff = ksize_rows + (ksize_rows - 1) * (rate_rows - 1);
+    const int ksize_cols_eff = ksize_cols + (ksize_cols - 1) * (rate_cols - 1);
+
+    int out_rows = 0, out_cols = 0;
+    int pad_rows = 0, pad_cols = 0;
+    OP_REQUIRES_OK(context, Get2dOutputSize(in_rows, in_cols, ksize_rows_eff,
+                                            ksize_cols_eff, stride_rows,
+                                            stride_cols, padding_, &out_rows,
+                                            &out_cols, &pad_rows, &pad_cols));
+
+    const std::vector<int64> out_sizes = {batch, out_rows, out_cols,
+                                          ksize_rows * ksize_cols * depth};
+    TensorShape out_shape(out_sizes);
+
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
+
+    // If there is nothing to compute, return.
+    if (out_shape.num_elements() == 0) {
+      return;
+    }
+
+    functor::ExtractImagePatchesForward<Device, T>()(
+        context->eigen_device<Device>(), input.tensor<T, 4>(), ksize_rows,
+        ksize_cols, stride_rows, stride_cols, rate_rows, rate_cols,
+        BrainPadding2EigenPadding(padding_), output->tensor<T, 4>());
+  }
+
+ private:
+  std::vector<int32> ksizes_;
+  std::vector<int32> strides_;
+  std::vector<int32> rates_;
+
+  Padding padding_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(ExtractImagePatchesOp);
+};
+
+// Registration of the CPU implementations.
+#define REGISTER(T)                                                          \
+  REGISTER_KERNEL_BUILDER(                                                   \
+      Name("ExtractImagePatches").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ExtractImagePatchesOp<CPUDevice, T>);
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER);
+
+#undef REGISTER
+
+#if GOOGLE_CUDA
+
+// Forward declarations of the functor specializations for GPU.
+namespace functor {
+
+#define DECLARE_GPU_SPEC(T)                                             \
+  template <>                                                           \
+  void ExtractImagePatchesForward<GPUDevice, T>::operator()(            \
+      const GPUDevice& d, typename TTypes<T, 4>::ConstTensor input,     \
+      int patch_rows, int patch_cols, int stride_rows, int stride_cols, \
+      int rate_rows, int rate_cols, const Eigen::PaddingType& padding,  \
+      typename TTypes<T, 4>::Tensor output);                            \
+  extern template struct ExtractImagePatchesForward<GPUDevice, T>;
+
+TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
+
+#undef DECLARE_GPU_SPEC
+
+}  // namespace functor
+
+// Registration of the GPU implementations.
+#define REGISTER(T)                                                          \
+  REGISTER_KERNEL_BUILDER(                                                   \
+      Name("ExtractImagePatches").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+      ExtractImagePatchesOp<GPUDevice, T>);
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER);
+
+#undef REGISTER
+
+#endif  // GOOGLE_CUDA
+
+}  // namespace tensorflow
--- a/tensorflow/core/kernels/extract_image_patches_op.h
+++ b/tensorflow/core/kernels/extract_image_patches_op.h
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_KERNELS_EXTRACT_IMAGE_PATCHES_OP_H_
+#define TENSORFLOW_KERNELS_EXTRACT_IMAGE_PATCHES_OP_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_types.h"
+
+namespace tensorflow {
+namespace functor {
+
+template <typename Device, typename T>
+struct ExtractImagePatchesForward {
+  void operator()(const Device& d, typename TTypes<T, 4>::ConstTensor input,
+                  int patch_rows, int patch_cols, int stride_rows,
+                  int stride_cols, int rate_rows, int rate_cols,
+                  const Eigen::PaddingType& padding,
+                  typename TTypes<T, 4>::Tensor output) {
+    // Need to swap row/col when calling Eigen, because our data is in
+    // NHWC format while Eigen assumes NWHC format.
+    To32Bit(output).device(d) =
+        To32Bit(input)
+            .extract_image_patches(patch_cols, patch_rows, stride_cols,
+                                   stride_rows, rate_cols, rate_rows, padding)
+            .reshape(output.dimensions());
+  }
+};
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_KERNELS_EXTRACT_IMAGE_PATCHES_OP_H_
--- a/tensorflow/core/kernels/extract_image_patches_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/extract_image_patches_op_gpu.cu.cc
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/kernels/extract_image_patches_op.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace functor {
+
+#define REGISTER(T) template struct ExtractImagePatchesForward<GPUDevice, T>;
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER);
+
+#undef REGISTER
+
+}  // end namespace functor
+}  // end namespace tensorflow
+
+#endif  // GOOGLE_CUDA
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -15,6 +15,7 @@ limitations under the License.

 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/util/mirror_pad_mode.h"
+#include "tensorflow/core/util/padding.h"

 namespace tensorflow {

@@ -1703,6 +1704,42 @@ x = [[ [1],   [2],  [5],  [6]],
 block_size: The size of the spatial block, same as in Space2Depth.
 )doc");

+// --------------------------------------------------------------------------
+
+REGISTER_OP("ExtractImagePatches")
+    .Input("images: T")
+    .Output("patches: T")
+    .Attr("ksizes: list(int) == 4")
+    .Attr("strides: list(int) == 4")
+    .Attr("rates: list(int) == 4")
+    .Attr("T: realnumbertype")
+    .Attr(GetPaddingAttrString())
+    .Doc(R"doc(
+Extract `patches` from `images` and puth them in the "depth" output dimension.
+
+images: 4-D Tensor with shape `[batch, in_rows, in_cols, depth]`.
+patches: 4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows *
+  ksize_cols * depth]` containing image patches with size
+  `ksize_rows x ksize_cols x depth` vectorized in the "depth" dimension.
+ksizes: The size of the sliding window for each dimension of `images`.
+strides: 1-D of length 4. How far the centers of two consecutive patches are in
+  the images. Must be: `[1, stride_rows, stride_cols, 1]`.
+rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the
+  input stride, specifying how far two consecutive patch samples are in the
+  input. Equivalent to extracting patches with
+  `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1), followed by
+  subsampling them spatially by a factor of `rates`.
+padding: The type of padding algorithm to use.
+
+We specify the size-related attributes as:
+
+      ksizes = [1, ksize_rows, ksize_cols, 1]
+      strides = [1, strides_rows, strides_cols, 1]
+      rates = [1, rates_rows, rates_cols, 1]
+)doc");
+
+// --------------------------------------------------------------------------
+
 REGISTER_OP("Bitcast")
    .Input("input: T")
    .Output("output: type")

--- a/tensorflow/python/kernel_tests/extract_image_patches_op_test.py
+++ b/tensorflow/python/kernel_tests/extract_image_patches_op_test.py
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for ExtractImagePatches op."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+
+
+class ExtractImagePatches(tf.test.TestCase):
+  """Functional tests for ExtractImagePatches op."""
+
+  def _VerifyValues(self, image, ksizes, strides, rates, padding, patches):
+    """Tests input-output pairs for the ExtractImagePatches op.
+
+    Args:
+      image: Input tensor with shape: [batch, in_rows, in_cols, depth].
+      ksizes: Patch size specified as: [ksize_rows, ksize_cols].
+      strides: Output strides, specified as [stride_rows, stride_cols].
+      rates: Atrous rates, specified as [rate_rows, rate_cols].
+      padding: Padding type.
+      patches: Expected output.
+    """
+    ksizes = [1] + ksizes + [1]
+    strides = [1] + strides + [1]
+    rates = [1] + rates + [1]
+
+    for use_gpu in [False, True]:
+      with self.test_session(use_gpu=use_gpu):
+        out_tensor = tf.extract_image_patches(
+            tf.constant(image),
+            ksizes=ksizes,
+            strides=strides,
+            rates=rates,
+            padding=padding,
+            name="im2col")
+        self.assertAllClose(patches, out_tensor.eval())
+
+  def testKsize1x1Stride1x1Rate1x1(self):
+    """Verifies that for 1x1 kernel the output equals the input."""
+    # [2, 3, 4, 5]
+    image = np.reshape(range(120), [2, 3, 4, 5])
+    # [2, 3, 4, 5]
+    patches = np.reshape(range(120), [2, 3, 4, 5])
+    for padding in ["VALID", "SAME"]:
+      self._VerifyValues(image,
+                         ksizes=[1, 1],
+                         strides=[1, 1],
+                         rates=[1, 1],
+                         padding=padding,
+                         patches=patches)
+
+  def testKsize1x1Stride2x3Rate1x1(self):
+    """Test for 1x1 kernel and strides."""
+    # [2, 4, 5, 3]
+    image = np.reshape(range(120), [2, 4, 5, 3])
+    # [2, 2, 2, 3]
+    patches = image[:, ::2, ::3, :]
+    for padding in ["VALID", "SAME"]:
+      self._VerifyValues(image,
+                         ksizes=[1, 1],
+                         strides=[2, 3],
+                         rates=[1, 1],
+                         padding=padding,
+                         patches=patches)
+
+  def testKsize2x2Stride1x1Rate1x1Valid(self):
+    """Test for 1x1 kernel ."""
+    # [1, 2, 2, 1]
+    image = [[[[1], [2]], [[3], [4]]]]
+    # [1, 1, 1, 4]
+    patches = [[[[1, 2, 3, 4]]]]
+    self._VerifyValues(image,
+                       ksizes=[2, 2],
+                       strides=[1, 1],
+                       rates=[1, 1],
+                       padding="VALID",
+                       patches=patches)
+
+  def testKsize2x2Stride1x1Rate1x1Same(self):
+    """Test for 1x1 kernel ."""
+    # [1, 2, 2, 1]
+    image = [[[[1], [2]], [[3], [4]]]]
+    # [1, 2, 2, 4]
+    patches = [[[[1, 2, 3, 4], [2, 0, 4, 0]], [[3, 4, 0, 0], [4, 0, 0, 0]]]]
+    self._VerifyValues(image,
+                       ksizes=[2, 2],
+                       strides=[1, 1],
+                       rates=[1, 1],
+                       padding="SAME",
+                       patches=patches)
+
+
+if __name__ == "__main__":
+  tf.test.main()
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -54,6 +54,7 @@ or join multiple tensors together.
 @@reverse_sequence
 @@reverse
 @@transpose
+@@extract_image_patches
 @@space_to_batch
 @@batch_to_space
 @@space_to_depth
@@ -1669,6 +1670,57 @@ def _QuantizeDequantizeShape(op):
  return common_shapes.unchanged_shape(op)


+@ops.RegisterShape("ExtractImagePatches")
+def _ExtractImagePatchesShape(op):
+  """Shape function for the ExtractImagePatches op.
+
+  Args:
+    op: An ExtractImagePatches op.
+
+  Raises:
+    ValueError: If the strides or padding are invalid.
+
+  Returns:
+    The shape of the op output.
+  """
+  images_shape = op.inputs[0].get_shape().with_rank(4)
+  batch = images_shape[0]
+  in_rows = images_shape[1]
+  in_cols = images_shape[2]
+  in_depth = images_shape[3]
+
+  ksize_b, ksize_r, ksize_c, ksize_d = op.get_attr("ksizes")
+  if ksize_b != 1 or ksize_d != 1:
+    raise ValueError("Current implementation does not yet support "
+                     "ksizes in the batch and depth dimensions.")
+
+  stride_b, stride_r, stride_c, stride_d = op.get_attr("strides")
+  if stride_b != 1 or stride_d != 1:
+    raise ValueError("Current implementation does not yet support "
+                     "strides in the batch and depth dimensions.")
+
+  rate_b, rate_r, rate_c, rate_d = op.get_attr("rates")
+  if rate_b != 1 or rate_d != 1:
+    raise ValueError("Current implementation does not yet support "
+                     "rates in the batch and depth dimensions.")
+
+  # Effective patch size, taking into account filter upsampling by rates.
+  ksize_r_eff = ksize_r + (ksize_r - 1) * (rate_r - 1)
+  ksize_c_eff = ksize_c + (ksize_c - 1) * (rate_c - 1)
+
+  padding = op.get_attr("padding")
+  out_rows, out_cols = common_shapes.get2d_conv_output_size(in_rows, in_cols,
+                                                            ksize_r_eff,
+                                                            ksize_c_eff,
+                                                            stride_r, stride_c,
+                                                            padding)
+
+  out_depth = None if in_depth is None else ksize_r * ksize_c * int(in_depth)
+  output_shape = [batch, out_rows, out_cols, out_depth]
+
+  return [tensor_shape.TensorShape(output_shape)]
+
+
 @ops.RegisterShape("SpaceToBatch")
 def _SpaceToBatchShape(op):
  """Shape function for the SpaceToBatch op.
@@ -2055,14 +2107,14 @@ def one_hot(indices, depth, on_value=None, off_value=None,
                  else None
    off_dtype = ops.convert_to_tensor(off_value).dtype.base_dtype if off_exists\
                  else None
-    
+
    if on_exists or off_exists:
      if dtype is not None:
        # Ensure provided on_value and/or off_value match dtype
        if (on_exists and on_dtype != dtype):
          raise TypeError("dtype {0} of on_value does not match " \
                          "dtype parameter {1}".format(on_dtype, dtype))
-        if (off_exists and off_dtype != dtype): 
+        if (off_exists and off_dtype != dtype):
          raise TypeError("dtype {0} of off_value does not match " \
                          "dtype parameter {1}".format(off_dtype, dtype))
      else:
@@ -2071,7 +2123,7 @@ def one_hot(indices, depth, on_value=None, off_value=None,
    elif dtype is None:
      # None of on_value, off_value, or dtype provided. Default dtype to float32
      dtype = dtypes.float32
-    
+
    if not on_exists:
      # on_value not provided: assign to value 1 of type dtype
      on_value = ops.convert_to_tensor(1, dtype, name="on_value")
@@ -2085,8 +2137,8 @@ def one_hot(indices, depth, on_value=None, off_value=None,
      raise TypeError("dtype {0} of on_value does not match " \
                      "dtype {1} of off_value".format(on_dtype, off_dtype))

-    return gen_array_ops._one_hot(indices, depth, on_value, 
-                                  off_value, axis, name)
+    return gen_array_ops._one_hot(indices, depth, on_value, off_value, axis,
+                                  name)


 @ops.RegisterShape("OneHot")