提交 01a6f5e5 编写于 作者: X Xiaoqiang Zheng 提交者: TensorFlower Gardener

Multiple layout support for pooling operations.

Change: 115611259
上级 cdd0f2ee
......@@ -33,6 +33,7 @@ limitations under the License.
#include "tensorflow/core/lib/gtl/array_slice.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/util/padding.h"
#include "tensorflow/core/util/tensor_format.h"
#if GOOGLE_CUDA
#include "tensorflow/core/kernels/maxpooling_op_gpu.h"
......@@ -48,6 +49,13 @@ template <typename Device, typename T>
class AvgPoolingOp : public UnaryOp<T> {
public:
explicit AvgPoolingOp(OpKernelConstruction* context) : UnaryOp<T>(context) {
string data_format;
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
errors::InvalidArgument("Invalid data format"));
OP_REQUIRES(
context, data_format_ == FORMAT_NHWC,
errors::InvalidArgument("Default AvgPoolingOp only supports NHWC."));
OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
OP_REQUIRES(context, ksize_.size() == 4,
errors::InvalidArgument("Sliding window ksize field must "
......@@ -64,8 +72,8 @@ class AvgPoolingOp : public UnaryOp<T> {
void Compute(OpKernelContext* context) override {
const Tensor& tensor_in = context->input(0);
PoolParameters params{context, ksize_, stride_, padding_,
tensor_in.shape()};
PoolParameters params{context, ksize_, stride_,
padding_, data_format_, tensor_in.shape()};
if (!context->status().ok()) {
return;
}
......@@ -81,21 +89,14 @@ class AvgPoolingOp : public UnaryOp<T> {
OP_REQUIRES_OK(context, context->allocate_output(
0, params.forward_output_shape(), &output));
if (std::is_same<Device, GPUDevice>::value) {
Eigen::PaddingType pt = BrainPadding2EigenPadding(padding_);
functor::SpatialAvgPooling<Device, T>()(
context->eigen_device<Device>(), output->tensor<T, 4>(),
tensor_in.tensor<T, 4>(), params.window_rows, params.window_cols,
params.row_stride, params.col_stride, pt);
} else {
SpatialAvgPool<Device, T>(context, output, tensor_in, params, padding_);
}
SpatialAvgPool<Device, T>(context, output, tensor_in, params, padding_);
}
private:
std::vector<int32> ksize_;
std::vector<int32> stride_;
Padding padding_;
TensorFormat data_format_;
};
REGISTER_KERNEL_BUILDER(Name("AvgPool")
......@@ -104,6 +105,71 @@ REGISTER_KERNEL_BUILDER(Name("AvgPool")
AvgPoolingOp<CPUDevice, float>);
#if GOOGLE_CUDA
template <typename T>
class AvgPoolingOp<GPUDevice, T> : public UnaryOp<T> {
public:
typedef GPUDevice Device;
explicit AvgPoolingOp(OpKernelConstruction* context) : UnaryOp<T>(context) {
string data_format;
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
errors::InvalidArgument("Invalid data format"));
OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
OP_REQUIRES(context, ksize_.size() == 4,
errors::InvalidArgument("Sliding window ksize field must "
"specify 4 dimensions"));
OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
OP_REQUIRES(context, stride_.size() == 4,
errors::InvalidArgument("Sliding window stride field must "
"specify 4 dimensions"));
OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
const int32 ksize_n = GetTensorDim(ksize_, data_format_, 'N');
const int32 stride_n = GetTensorDim(stride_, data_format_, 'N');
OP_REQUIRES(context, ksize_n == 1 && stride_n == 1,
errors::Unimplemented(
"Pooling is not yet supported on the batch dimension."));
}
void Compute(OpKernelContext* context) override {
const Tensor& tensor_in = context->input(0);
PoolParameters params{context, ksize_, stride_,
padding_, data_format_, tensor_in.shape()};
if (!context->status().ok()) {
return;
}
OP_REQUIRES(context, params.depth_window == 1,
errors::Unimplemented("Non-spatial pooling is not "
"yet supported. Volunteers? :)"));
// For avgpooling, tensor_in should have 4 dimensions.
OP_REQUIRES(context, tensor_in.dims() == 4,
errors::InvalidArgument("tensor_in must be 4-dimensional"));
TensorShape output_shape = params.forward_output_shape();
if (data_format_ == FORMAT_NCHW) {
DnnPoolingOp<T>::Compute(
context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_,
stride_, padding_, data_format_, tensor_in, output_shape);
} else {
Tensor* output = nullptr;
OP_REQUIRES_OK(context,
context->allocate_output(0, output_shape, &output));
Eigen::PaddingType pt = BrainPadding2EigenPadding(padding_);
functor::SpatialAvgPooling<Device, T>()(
context->eigen_device<Device>(), output->tensor<T, 4>(),
tensor_in.tensor<T, 4>(), params.window_rows, params.window_cols,
params.row_stride, params.col_stride, pt);
}
}
private:
std::vector<int32> ksize_;
std::vector<int32> stride_;
Padding padding_;
TensorFormat data_format_;
};
// Forward declarations of the functor specializations for GPU.
namespace functor {
#define DECLARE_GPU_SPEC(T) \
......@@ -134,6 +200,13 @@ template <typename Device, class T>
class AvgPoolingGradOp : public OpKernel {
public:
explicit AvgPoolingGradOp(OpKernelConstruction* context) : OpKernel(context) {
string data_format;
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
errors::InvalidArgument("Invalid data format"));
OP_REQUIRES(context, data_format_ == FORMAT_NHWC,
errors::InvalidArgument(
"Default AvgPoolingGradOp only supports NHWC."));
OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
OP_REQUIRES(context, ksize_.size() == 4,
errors::InvalidArgument("Sliding window ksize field must "
......@@ -249,6 +322,7 @@ class AvgPoolingGradOp : public OpKernel {
std::vector<int32> ksize_;
std::vector<int32> stride_;
Padding padding_;
TensorFormat data_format_;
};
REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad")
......@@ -272,6 +346,10 @@ class AvgPoolingGradOp<GPUDevice, T> : public OpKernel {
typedef GPUDevice Device;
explicit AvgPoolingGradOp(OpKernelConstruction* context) : OpKernel(context) {
string data_format;
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
errors::InvalidArgument("Invalid data format"));
OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
OP_REQUIRES(context, ksize_.size() == 4,
errors::InvalidArgument("Sliding window ksize field must "
......@@ -281,7 +359,9 @@ class AvgPoolingGradOp<GPUDevice, T> : public OpKernel {
errors::InvalidArgument("Sliding window strides field must "
"specify 4 dimensions"));
OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
const int32 ksize_n = GetTensorDim(ksize_, data_format_, 'N');
const int32 stride_n = GetTensorDim(stride_, data_format_, 'N');
OP_REQUIRES(context, ksize_n == 1 && stride_n == 1,
errors::Unimplemented(
"Pooling is not yet supported on the batch dimension."));
}
......@@ -307,13 +387,15 @@ class AvgPoolingGradOp<GPUDevice, T> : public OpKernel {
DnnPoolingGradOp<T>::Compute(
context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_,
stride_, padding_, nullptr, nullptr, out_backprop, output_shape);
stride_, padding_, data_format_, nullptr, nullptr, out_backprop,
output_shape);
}
private:
std::vector<int32> ksize_;
std::vector<int32> stride_;
Padding padding_;
TensorFormat data_format_;
};
REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad")
......@@ -332,6 +414,10 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel {
explicit AvgPoolingGradOpCustomGPUKernel(OpKernelConstruction* context)
: OpKernel(context) {
string data_format;
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
errors::InvalidArgument("Invalid data format"));
OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
OP_REQUIRES(context, ksize_.size() == 4,
errors::InvalidArgument("Sliding window ksize field must "
......@@ -341,7 +427,9 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel {
errors::InvalidArgument("Sliding window strides field must "
"specify 4 dimensions"));
OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
const int32 ksize_n = GetTensorDim(ksize_, data_format_, 'N');
const int32 stride_n = GetTensorDim(stride_, data_format_, 'N');
OP_REQUIRES(context, ksize_n == 1 && stride_n == 1,
errors::Unimplemented(
"Pooling is not yet supported on the batch dimension."));
}
......@@ -358,63 +446,73 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel {
// For avgpooling, out_backprop should have 4 dimensions.
OP_REQUIRES(context, out_backprop.dims() == 4,
errors::InvalidArgument("out_backprop must be 4-dimensional"));
const int64 out_backprop_batch = out_backprop.dim_size(0);
const int64 out_backprop_rows = out_backprop.dim_size(1);
const int64 out_backprop_cols = out_backprop.dim_size(2);
const int64 out_backprop_depth = out_backprop.dim_size(3);
TensorShape output_shape;
auto shape_vec = tensor_in_shape.vec<int32>();
for (int64 i = 0; i < tensor_in_shape.NumElements(); ++i) {
output_shape.AddDim(shape_vec(i));
}
const int64 in_rows = output_shape.dim_size(1);
const int64 in_cols = output_shape.dim_size(2);
Tensor* output = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
const int window_rows = ksize_[1];
const int window_cols = ksize_[2];
const int depth_window = ksize_[3];
const int row_stride = stride_[1];
const int col_stride = stride_[2];
// We (will) use different code for spatial pooling and
// non-spatial pooling.
//
// Spatial pooling is when depth_window = 1
OP_REQUIRES(context, depth_window == 1,
errors::Unimplemented("Non-spatial pooling is not "
"yet supported. Volunteers? :)"));
int out_height, out_width, pad_rows, pad_cols;
OP_REQUIRES_OK(
context, Get2dOutputSize(in_rows, in_cols, window_rows, window_cols,
row_stride, col_stride, padding_, &out_height,
&out_width, &pad_rows, &pad_cols));
RunAvePoolBackwardNHWC<T>(out_backprop.flat<T>().data(), // top_diff
out_backprop_batch, // num
in_rows, // height
in_cols, // width
out_backprop_depth, // channels
out_backprop_rows, // pooled_height
out_backprop_cols, // pooled_width
window_rows, // kernel_h
window_cols, // kernel_w
row_stride, // stride_h
col_stride, // stride_w
pad_rows, // pad_t
pad_cols, // pad_l
output->flat<T>().data(), // bottom_diff
context->eigen_gpu_device()); // d
if (data_format_ == FORMAT_NHWC) {
const int64 out_backprop_batch = out_backprop.dim_size(0);
const int64 out_backprop_rows = out_backprop.dim_size(1);
const int64 out_backprop_cols = out_backprop.dim_size(2);
const int64 out_backprop_depth = out_backprop.dim_size(3);
const int64 in_rows = output_shape.dim_size(1);
const int64 in_cols = output_shape.dim_size(2);
Tensor* output = nullptr;
OP_REQUIRES_OK(context,
context->allocate_output(0, output_shape, &output));
const int window_rows = ksize_[1];
const int window_cols = ksize_[2];
const int depth_window = ksize_[3];
const int row_stride = stride_[1];
const int col_stride = stride_[2];
// We (will) use different code for spatial pooling and
// non-spatial pooling.
//
// Spatial pooling is when depth_window = 1
OP_REQUIRES(context, depth_window == 1,
errors::Unimplemented("Non-spatial pooling is not "
"yet supported. Volunteers? :)"));
int out_height, out_width, pad_rows, pad_cols;
OP_REQUIRES_OK(context, Get2dOutputSize(
in_rows, in_cols, window_rows, window_cols,
row_stride, col_stride, padding_, &out_height,
&out_width, &pad_rows, &pad_cols));
RunAvePoolBackwardNHWC<T>(out_backprop.flat<T>().data(), // top_diff
out_backprop_batch, // num
in_rows, // height
in_cols, // width
out_backprop_depth, // channels
out_backprop_rows, // pooled_height
out_backprop_cols, // pooled_width
window_rows, // kernel_h
window_cols, // kernel_w
row_stride, // stride_h
col_stride, // stride_w
pad_rows, // pad_t
pad_cols, // pad_l
output->flat<T>().data(), // bottom_diff
context->eigen_gpu_device()); // d
} else {
DnnPoolingGradOp<T>::Compute(
context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_,
stride_, padding_, data_format_, nullptr, nullptr, out_backprop,
output_shape);
}
}
private:
std::vector<int32> ksize_;
std::vector<int32> stride_;
Padding padding_;
TensorFormat data_format_;
};
REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad")
......
......@@ -34,6 +34,7 @@ limitations under the License.
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/gtl/array_slice.h"
#include "tensorflow/core/util/padding.h"
#include "tensorflow/core/util/tensor_format.h"
#include "tensorflow/core/util/use_cudnn.h"
#if GOOGLE_CUDA
......@@ -170,6 +171,13 @@ template <class Device, class T>
class MaxPoolingGradOp : public OpKernel {
public:
explicit MaxPoolingGradOp(OpKernelConstruction* context) : OpKernel(context) {
string data_format;
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
errors::InvalidArgument("Invalid data format"));
OP_REQUIRES(context, data_format_ == FORMAT_NHWC,
errors::InvalidArgument(
"Default MaxPoolinGradgOp only supports NHWC."));
OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
OP_REQUIRES(context, ksize_.size() == 4,
errors::InvalidArgument("Sliding window ksize field must "
......@@ -215,8 +223,8 @@ class MaxPoolingGradOp : public OpKernel {
tensor_out.shape(),
&tensor_out_arg_max));
PoolParameters params{context, ksize_, stride_, padding_,
tensor_in.shape()};
PoolParameters params{context, ksize_, stride_,
padding_, FORMAT_NHWC, tensor_in.shape()};
if (!context->status().ok()) {
return;
}
......@@ -250,6 +258,7 @@ class MaxPoolingGradOp : public OpKernel {
std::vector<int32> ksize_;
std::vector<int32> stride_;
Padding padding_;
TensorFormat data_format_;
};
REGISTER_KERNEL_BUILDER(Name("MaxPoolGrad").Device(DEVICE_CPU),
......@@ -266,7 +275,8 @@ static void MaxPoolingBackwardCustomKernel(
OP_REQUIRES_OK(context,
context->allocate_output(0, tensor_in_shape, &output));
PoolParameters params{context, size, stride, padding, tensor_in_shape};
PoolParameters params{context, size, stride,
padding, FORMAT_NHWC, tensor_in_shape};
if (!context->status().ok()) {
return;
}
......@@ -286,6 +296,10 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
typedef Eigen::GpuDevice Device;
explicit MaxPoolingGradOp(OpKernelConstruction* context) : OpKernel(context) {
string data_format;
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
errors::InvalidArgument("Invalid data format"));
OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
OP_REQUIRES(context, ksize_.size() == 4,
errors::InvalidArgument("Sliding window ksize field must "
......@@ -295,7 +309,9 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
errors::InvalidArgument("Sliding window strides field must "
"specify 4 dimensions"));
OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
const int32 ksize_n = GetTensorDim(ksize_, data_format_, 'N');
const int32 stride_n = GetTensorDim(stride_, data_format_, 'N');
OP_REQUIRES(context, ksize_n == 1 && stride_n == 1,
errors::Unimplemented(
"Pooling is not yet supported on the batch dimension."));
......@@ -321,9 +337,11 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
if (use_dnn_) {
DnnPoolingGradOp<T>::Compute(
context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize_,
stride_, padding_, &tensor_in, &tensor_out, out_backprop,
output_shape);
stride_, padding_, data_format_, &tensor_in, &tensor_out,
out_backprop, output_shape);
} else {
CHECK(data_format_ == FORMAT_NHWC)
<< "Non-Cudnn MaxPoolGrad only supports NHWC format";
MaxPoolingBackwardCustomKernel(context, ksize_, stride_, padding_,
&tensor_in, out_backprop, output_shape);
}
......@@ -333,6 +351,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
std::vector<int32> ksize_;
std::vector<int32> stride_;
Padding padding_;
TensorFormat data_format_;
bool use_dnn_;
};
......@@ -349,6 +368,13 @@ class MaxPoolingNoMaskOp : public OpKernel {
public:
explicit MaxPoolingNoMaskOp(OpKernelConstruction* context)
: OpKernel(context) {
string data_format;
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
errors::InvalidArgument("Invalid data format"));
OP_REQUIRES(context, data_format_ == FORMAT_NHWC,
errors::InvalidArgument(
"Default MaxPoolingNoMaskOp only supports NHWC."));
OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
OP_REQUIRES(context, ksize_.size() == 4,
errors::InvalidArgument("Sliding window ksize field must "
......@@ -366,8 +392,8 @@ class MaxPoolingNoMaskOp : public OpKernel {
void Compute(OpKernelContext* context) override {
const Tensor& tensor_in = context->input(0);
PoolParameters params{context, ksize_, stride_, padding_,
tensor_in.shape()};
PoolParameters params{context, ksize_, stride_,
padding_, data_format_, tensor_in.shape()};
if (!context->status().ok()) {
return;
}
......@@ -385,6 +411,7 @@ class MaxPoolingNoMaskOp : public OpKernel {
std::vector<int32> ksize_;
std::vector<int32> stride_;
Padding padding_;
TensorFormat data_format_;
};
template <typename Device, typename T>
......@@ -412,8 +439,8 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
void Compute(OpKernelContext* context) override {
const Tensor& tensor_in = context->input(0);
PoolParameters params{context, ksize_, stride_, padding_,
tensor_in.shape()};
PoolParameters params{context, ksize_, stride_,
padding_, FORMAT_NHWC, tensor_in.shape()};
if (!context->status().ok()) {
return;
}
......@@ -462,8 +489,8 @@ class MaxPoolingGradWithArgmaxOp : public OpKernel {
const Tensor& grad_in = context->input(1);
const Tensor& argmax = context->input(2);
PoolParameters params{context, ksize_, stride_, padding_,
tensor_in.shape()};
PoolParameters params{context, ksize_, stride_,
padding_, FORMAT_NHWC, tensor_in.shape()};
if (!context->status().ok()) {
return;
}
......@@ -484,6 +511,66 @@ class MaxPoolingGradWithArgmaxOp : public OpKernel {
};
#if GOOGLE_CUDA
template <typename T>
class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
public:
typedef GPUDevice Device;
explicit MaxPoolingNoMaskOp(OpKernelConstruction* context)
: OpKernel(context) {
string data_format;
OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
errors::InvalidArgument("Invalid data format"));
OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
OP_REQUIRES(context, ksize_.size() == 4,
errors::InvalidArgument("Sliding window ksize field must "
"specify 4 dimensions"));
OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
OP_REQUIRES(context, stride_.size() == 4,
errors::InvalidArgument("Sliding window stride field must "
"specify 4 dimensions"));
OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
const int32 ksize_n = GetTensorDim(ksize_, data_format_, 'N');
const int32 stride_n = GetTensorDim(stride_, data_format_, 'N');
OP_REQUIRES(context, ksize_n == 1 && stride_n == 1,
errors::Unimplemented(
"Pooling is not yet supported on the batch dimension."));
use_dnn_ = CanUseCudnn();
}
void Compute(OpKernelContext* context) override {
const Tensor& tensor_in = context->input(0);
PoolParameters params{context, ksize_, stride_,
padding_, data_format_, tensor_in.shape()};
if (!context->status().ok()) {
return;
}
TensorShape out_shape =
ShapeFromFormat(data_format_, params.tensor_in_batch, params.out_height,
params.out_width, params.depth);
if (use_dnn_ && data_format_ == FORMAT_NCHW) {
DnnPoolingOp<T>::Compute(
context, perftools::gputools::dnn::PoolingMode::kMaximum, ksize_,
stride_, padding_, data_format_, tensor_in, out_shape);
} else {
CHECK(data_format_ == FORMAT_NHWC)
<< "Non-Cudnn MaxPool only supports NHWC format";
Tensor* output = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
LaunchMaxPoolingNoMask<Device, T>::launch(context, params, tensor_in,
output);
}
}
private:
std::vector<int32> ksize_;
std::vector<int32> stride_;
Padding padding_;
TensorFormat data_format_;
bool use_dnn_;
};
template <typename T>
struct LaunchMaxPoolingNoMask<Eigen::GpuDevice, T> {
......
......@@ -32,22 +32,23 @@ namespace tensorflow {
PoolParameters::PoolParameters(OpKernelContext* context,
const std::vector<int32>& ksize,
const std::vector<int32>& stride,
Padding padding,
Padding padding, TensorFormat data_format,
const TensorShape& tensor_in_shape) {
// For maxpooling, tensor_in should have 4 dimensions.
OP_REQUIRES(context, tensor_in_shape.dims() == 4,
errors::InvalidArgument("tensor_in must be 4-dimensional"));
depth = tensor_in_shape.dim_size(3);
tensor_in_cols = tensor_in_shape.dim_size(2);
tensor_in_rows = tensor_in_shape.dim_size(1);
tensor_in_batch = tensor_in_shape.dim_size(0);
window_rows = ksize[1];
window_cols = ksize[2];
depth_window = ksize[3];
row_stride = stride[1];
col_stride = stride[2];
depth_stride = stride[3];
this->data_format = data_format;
depth = GetTensorDim(tensor_in_shape, data_format, 'C');
tensor_in_cols = GetTensorDim(tensor_in_shape, data_format, 'W');
tensor_in_rows = GetTensorDim(tensor_in_shape, data_format, 'H');
tensor_in_batch = GetTensorDim(tensor_in_shape, data_format, 'N');
window_rows = GetTensorDim(ksize, data_format, 'H');
window_cols = GetTensorDim(ksize, data_format, 'W');
depth_window = GetTensorDim(ksize, data_format, 'C');
row_stride = GetTensorDim(stride, data_format, 'H');
col_stride = GetTensorDim(stride, data_format, 'W');
depth_stride = GetTensorDim(stride, data_format, 'C');
// We only support 2D pooling across width/height and depthwise
// pooling, not a combination.
......@@ -91,7 +92,8 @@ PoolParameters::PoolParameters(OpKernelContext* context,
TensorShape PoolParameters::forward_output_shape() {
if (depth_window == 1) {
// Spatial pooling
return TensorShape({tensor_in_batch, out_height, out_width, depth});
return ShapeFromFormat(data_format, tensor_in_batch, out_height, out_width,
depth);
} else {
// Depthwise pooling
return TensorShape(
......@@ -126,23 +128,118 @@ DECLARE_GPU_SPEC(float);
#undef DECLARE_GPU_SPEC
} // namespace functor
template <typename T>
void DnnPoolingOp<T>::Compute(
OpKernelContext* context,
perftools::gputools::dnn::PoolingMode pooling_mode,
const std::vector<int32>& size, const std::vector<int32>& stride,
Padding padding, TensorFormat data_format, const Tensor& tensor_in,
const TensorShape& tensor_out_shape) {
Tensor* tensor_out = nullptr;
OP_REQUIRES_OK(context,
context->allocate_output(0, tensor_out_shape, &tensor_out));
PoolParameters params{context, size, stride,
padding, data_format, tensor_in.shape()};
if (!context->status().ok()) {
return;
}
/// For now, cudnn does not support NHWC format, so we need to convert it
/// to NCHW before calling cudnn. We need to get rid of this once it is done
Tensor transformed_input;
if (data_format == FORMAT_NHWC) {
OP_REQUIRES_OK(context, context->allocate_temp(
DataTypeToEnum<T>::value,
ShapeFromFormat(FORMAT_NCHW, tensor_in.shape(),
data_format),
&transformed_input));
functor::NHWCToNCHW<GPUDevice, T>()(context->eigen_device<Device>(),
tensor_in.tensor<T, 4>(),
transformed_input.tensor<T, 4>());
} else {
transformed_input = tensor_in;
}
Tensor transformed_output;
if (data_format == FORMAT_NHWC) {
OP_REQUIRES_OK(context, context->allocate_temp(
DataTypeToEnum<T>::value,
ShapeFromFormat(FORMAT_NCHW, tensor_out_shape,
data_format),
&transformed_output));
} else {
transformed_output = *tensor_out;
}
/// Get ready to call cudnn
perftools::gputools::dnn::PoolingDescriptor pooling_desc;
pooling_desc.set_pooling_mode(pooling_mode)
.set_window_height(params.window_rows)
.set_window_width(params.window_cols)
.set_vertical_stride(params.row_stride)
.set_horizontal_stride(params.col_stride)
.set_vertical_padding(params.pad_rows)
.set_horizontal_padding(params.pad_cols);
perftools::gputools::dnn::BatchDescriptor input_desc;
input_desc.set_count(params.tensor_in_batch)
.set_height(params.tensor_in_rows)
.set_width(params.tensor_in_cols)
.set_feature_map_count(params.depth)
.set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX);
perftools::gputools::dnn::BatchDescriptor output_desc;
output_desc.set_count(params.tensor_in_batch)
.set_height(params.out_height)
.set_width(params.out_width)
.set_feature_map_count(params.depth)
.set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX);
auto input_data = AsDeviceMemory(transformed_input.template flat<T>().data(),
transformed_input.template flat<T>().size());
auto output_data =
AsDeviceMemory(transformed_output.template flat<T>().data(),
transformed_output.template flat<T>().size());
auto* stream = context->op_device_context<GPUDeviceContext>()->stream();
OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
bool status = stream
->ThenPoolForward(pooling_desc, input_desc, input_data,
output_desc, &output_data)
.ok();
OP_REQUIRES(context, status,
errors::Internal("cudnn PoolBackward launch failed"));
if (data_format == FORMAT_NHWC) {
/// Transform the output data from NCHW back to NHWC
auto toConstTensor = [](const Tensor& x) -> const Tensor { return x; };
functor::NCHWToNHWC<GPUDevice, T>()(
context->eigen_device<Device>(),
toConstTensor(transformed_output).template tensor<T, 4>(),
tensor_out->tensor<T, 4>());
}
}
template <typename T>
void DnnPoolingGradOp<T>::Compute(
OpKernelContext* context,
perftools::gputools::dnn::PoolingMode pooling_mode,
const std::vector<int32>& size, const std::vector<int32>& stride,
Padding padding, const Tensor* tensor_in, const Tensor* tensor_out,
const Tensor& out_backprop, const TensorShape& tensor_in_shape) {
CHECK((pooling_mode == perftools::gputools::dnn::PoolingMode::kMaximum) ||
Padding padding, TensorFormat data_format, const Tensor* tensor_in,
const Tensor* tensor_out, const Tensor& out_backprop,
const TensorShape& tensor_in_shape) {
CHECK((pooling_mode != perftools::gputools::dnn::PoolingMode::kMaximum) ||
(tensor_in && tensor_out))
<< "For MaxPoolGrad, both tensor_in and tensor_out needs to be "
"specified";
Tensor* output = nullptr;
Tensor* input_backprop = nullptr;
OP_REQUIRES_OK(context,
context->allocate_output(0, tensor_in_shape, &output));
context->allocate_output(0, tensor_in_shape, &input_backprop));
PoolParameters params{context, size, stride, padding, tensor_in_shape};
PoolParameters params{context, size, stride,
padding, data_format, tensor_in_shape};
if (!context->status().ok()) {
return;
}
......@@ -150,57 +247,68 @@ void DnnPoolingGradOp<T>::Compute(
/// For now, cudnn does not support NHWC format, so we need to convert it
/// to NCHW before calling cudnn. We need to get rid of this once it is done
Tensor transformed_input;
OP_REQUIRES_OK(context, context->allocate_temp(
DataTypeToEnum<T>::value,
TensorShape({tensor_in_shape.dim_size(0),
tensor_in_shape.dim_size(3),
tensor_in_shape.dim_size(1),
tensor_in_shape.dim_size(2)}),
&transformed_input));
Tensor transformed_input_backprop;
OP_REQUIRES_OK(context, context->allocate_temp(
DataTypeToEnum<T>::value,
TensorShape({tensor_in_shape.dim_size(0),
tensor_in_shape.dim_size(3),
tensor_in_shape.dim_size(1),
tensor_in_shape.dim_size(2)}),
&transformed_input_backprop));
TensorShape transformed_input_shape;
if (data_format == FORMAT_NHWC || !tensor_in) {
transformed_input_shape =
ShapeFromFormat(FORMAT_NCHW, tensor_in_shape, data_format);
OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<T>::value,
transformed_input_shape,
&transformed_input));
} else {
transformed_input = *tensor_in;
}
Tensor transformed_output;
OP_REQUIRES_OK(
context,
context->allocate_temp(
DataTypeToEnum<T>::value,
TensorShape({out_backprop.dim_size(0), out_backprop.dim_size(3),
out_backprop.dim_size(1), out_backprop.dim_size(2)}),
&transformed_output));
TensorShape transformed_output_shape;
if (data_format == FORMAT_NHWC || !tensor_out) {
transformed_output_shape =
ShapeFromFormat(FORMAT_NCHW, out_backprop.shape(), data_format);
OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<T>::value,
transformed_output_shape,
&transformed_output));
} else {
transformed_output = *tensor_out;
}
Tensor transformed_input_backprop;
if (data_format == FORMAT_NHWC) {
OP_REQUIRES_OK(context,
context->allocate_temp(DataTypeToEnum<T>::value,
transformed_input_shape,
&transformed_input_backprop));
} else {
transformed_input_backprop = *input_backprop;
}
Tensor transformed_output_backprop;
OP_REQUIRES_OK(
context,
context->allocate_temp(
DataTypeToEnum<T>::value,
TensorShape({out_backprop.dim_size(0), out_backprop.dim_size(3),
out_backprop.dim_size(1), out_backprop.dim_size(2)}),
&transformed_output_backprop));
if (tensor_in) {
// For AvgPoolGrad, the original input tensor is not necessary. However,
// cudnn still requires them to run, although they do not affect the
// results.
functor::NHWCToNCHW<GPUDevice, T>()(context->eigen_device<Device>(),
tensor_in->tensor<T, 4>(),
transformed_input.tensor<T, 4>());
if (data_format == FORMAT_NHWC) {
OP_REQUIRES_OK(context,
context->allocate_temp(DataTypeToEnum<T>::value,
transformed_output_shape,
&transformed_output_backprop));
} else {
transformed_output_backprop = out_backprop;
}
if (tensor_out) {
// For AvgPoolGrad, the original output tensor is not necessary. However,
// cudnn still requires them to run, although they do not affect the
// results.
functor::NHWCToNCHW<GPUDevice, T>()(context->eigen_device<Device>(),
tensor_out->tensor<T, 4>(),
transformed_output.tensor<T, 4>());
if (data_format == FORMAT_NHWC) {
/// Convert the data from NHWC to NCHW if necessary.
if (tensor_in) {
// For AvgPoolGrad, the original input tensor is not necessary. However,
// cudnn still requires them to run, although they do not affect the
// results.
functor::NHWCToNCHW<GPUDevice, T>()(context->eigen_device<Device>(),
tensor_in->tensor<T, 4>(),
transformed_input.tensor<T, 4>());
}
if (tensor_out) {
// For AvgPoolGrad, the original output tensor is not necessary. However,
// cudnn still requires them to run, although they do not affect the
// results.
functor::NHWCToNCHW<GPUDevice, T>()(context->eigen_device<Device>(),
tensor_out->tensor<T, 4>(),
transformed_output.tensor<T, 4>());
}
functor::NHWCToNCHW<GPUDevice, T>()(
context->eigen_device<Device>(), out_backprop.tensor<T, 4>(),
transformed_output_backprop.tensor<T, 4>());
}
functor::NHWCToNCHW<GPUDevice, T>()(
context->eigen_device<Device>(), out_backprop.tensor<T, 4>(),
transformed_output_backprop.tensor<T, 4>());
/// Get ready to call cudnn
perftools::gputools::dnn::PoolingDescriptor pooling_desc;
......@@ -232,10 +340,10 @@ void DnnPoolingGradOp<T>::Compute(
auto orig_input_data =
AsDeviceMemory(transformed_input.template flat<T>().data(),
transformed_input.template flat<T>().size());
auto output_backprop =
auto output_backprop_data =
AsDeviceMemory(transformed_output_backprop.template flat<T>().data(),
transformed_output_backprop.template flat<T>().size());
auto input_backprop =
auto input_backprop_data =
AsDeviceMemory(transformed_input_backprop.template flat<T>().data(),
transformed_input_backprop.template flat<T>().size());
......@@ -243,21 +351,25 @@ void DnnPoolingGradOp<T>::Compute(
OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
bool status =
stream->ThenPoolBackward(pooling_desc, orig_input_desc, orig_input_data,
orig_output_desc, orig_output_data,
output_backprop, &input_backprop)
stream
->ThenPoolBackward(pooling_desc, orig_input_desc, orig_input_data,
orig_output_desc, orig_output_data,
output_backprop_data, &input_backprop_data)
.ok();
OP_REQUIRES(context, status,
errors::Internal("cudnn PoolBackward launch failed"));
/// Transform the output data from NCHW back to NHWC
auto toConstTensor = [](const Tensor& x) -> const Tensor { return x; };
functor::NCHWToNHWC<GPUDevice, T>()(
context->eigen_device<Device>(),
toConstTensor(transformed_input_backprop).template tensor<T, 4>(),
output->tensor<T, 4>());
if (data_format == FORMAT_NHWC) {
/// Transform the output data from NCHW back to NHWC.
auto toConstTensor = [](const Tensor& x) -> const Tensor { return x; };
functor::NCHWToNHWC<GPUDevice, T>()(
context->eigen_device<Device>(),
toConstTensor(transformed_input_backprop).template tensor<T, 4>(),
input_backprop->tensor<T, 4>());
}
}
template class DnnPoolingOp<float>;
template class DnnPoolingGradOp<float>;
#endif // GOOGLE_CUDA
......
......@@ -27,6 +27,7 @@ limitations under the License.
#include "tensorflow/core/kernels/maxpooling_op.h"
#include "tensorflow/core/kernels/ops_util.h"
#include "tensorflow/core/util/padding.h"
#include "tensorflow/core/util/tensor_format.h"
namespace tensorflow {
......@@ -37,7 +38,7 @@ struct PoolParameters {
// Updates context->status if there is an invalid input.
PoolParameters(OpKernelContext* context, const std::vector<int32>& ksize,
const std::vector<int32>& stride, Padding padding,
const TensorShape& tensor_in_shape);
TensorFormat data_format, const TensorShape& tensor_in_shape);
// Returns the shape of the output for "forward" pooling operations.
TensorShape forward_output_shape();
......@@ -63,6 +64,8 @@ struct PoolParameters {
int pad_rows;
int pad_cols;
int pad_depth;
TensorFormat data_format;
};
// An implementation of MaxPooling (forward).
......@@ -70,6 +73,17 @@ template <typename Device, typename T>
class MaxPoolingOp : public OpKernel {
public:
explicit MaxPoolingOp(OpKernelConstruction* context) : OpKernel(context) {
string data_format;
auto status = context->GetAttr("data_format", &data_format);
if (status.ok()) {
OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
errors::InvalidArgument("Invalid data format"));
OP_REQUIRES(
context, data_format_ == FORMAT_NHWC,
errors::InvalidArgument("Default MaxPoolingOp only supports NHWC."));
} else {
data_format_ = FORMAT_NHWC;
}
OP_REQUIRES_OK(context, context->GetAttr("ksize", &ksize_));
OP_REQUIRES(context, ksize_.size() == 4,
errors::InvalidArgument("Sliding window ksize field must "
......@@ -86,8 +100,8 @@ class MaxPoolingOp : public OpKernel {
void Compute(OpKernelContext* context) override {
const Tensor& tensor_in = context->input(0);
PoolParameters params{context, ksize_, stride_, padding_,
tensor_in.shape()};
PoolParameters params{context, ksize_, stride_,
padding_, FORMAT_NHWC, tensor_in.shape()};
if (!context->status().ok()) {
return;
}
......@@ -200,6 +214,7 @@ class MaxPoolingOp : public OpKernel {
std::vector<int32> ksize_;
std::vector<int32> stride_;
Padding padding_;
TensorFormat data_format_;
};
template <typename Device, typename T>
......
......@@ -31,9 +31,23 @@ limitations under the License.
#include "tensorflow/core/kernels/ops_util.h"
#include "tensorflow/core/platform/stream_executor.h"
#include "tensorflow/core/util/padding.h"
#include "tensorflow/core/util/tensor_format.h"
namespace tensorflow {
// A helper class that launch the cudnn pooling forward operations.
template <typename T>
class DnnPoolingOp {
public:
typedef GPUDevice Device;
static void Compute(OpKernelContext* context,
perftools::gputools::dnn::PoolingMode pooling_mode,
const std::vector<int32>& size,
const std::vector<int32>& stride, Padding padding,
TensorFormat data_format, const Tensor& tensor_in,
const TensorShape& tensor_out_shape);
};
// A helper class that launch the cudnn pooling backward operations.
// The original input and output tensors are optional for AvgPoolGrad, but
// mandatory for MaxPoolGrad.
......@@ -45,8 +59,8 @@ class DnnPoolingGradOp {
perftools::gputools::dnn::PoolingMode pooling_mode,
const std::vector<int32>& size,
const std::vector<int32>& stride, Padding padding,
const Tensor* tensor_in, const Tensor* tensor_out,
const Tensor& out_backprop,
TensorFormat data_format, const Tensor* tensor_in,
const Tensor* tensor_out, const Tensor& out_backprop,
const TensorShape& tensor_in_shape);
};
......
......@@ -27,6 +27,7 @@ REGISTER_OP("AvgPool")
.Attr("ksize: list(int) >= 4")
.Attr("strides: list(int) >= 4")
.Attr(GetPaddingAttrString())
.Attr(GetConvnetDataFormatAttrString())
.Attr("T: {float, double}")
.Doc(R"doc(
Performs average pooling on the input.
......@@ -38,6 +39,11 @@ value: 4-D with shape `[batch, height, width, channels]`.
ksize: The size of the sliding window for each dimension of `value`.
strides: The stride of the sliding window for each dimension of `value`.
padding: The type of padding algorithm to use.
data_format: Specify the data format of the input and output data. With the
default format "NHWC", the data is stored in the order of:
[batch, in_height, in_width, in_channels].
Alternatively, the format could be "NCHW", the data storage order of:
[batch, in_channels, in_height, in_width].
output: The average pooled output tensor.
)doc");
......@@ -48,6 +54,7 @@ REGISTER_OP("AvgPoolGrad")
.Attr("ksize: list(int) >= 4")
.Attr("strides: list(int) >= 4")
.Attr(GetPaddingAttrString())
.Attr(GetConvnetDataFormatAttrString())
.Attr("T: {float, double}")
.Doc(R"doc(
Computes gradients of the average pooling function.
......@@ -58,6 +65,11 @@ grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t.
ksize: The size of the sliding window for each dimension of the input.
strides: The stride of the sliding window for each dimension of the input.
padding: The type of padding algorithm to use.
data_format: Specify the data format of the input and output data. With the
default format "NHWC", the data is stored in the order of:
[batch, in_height, in_width, in_channels].
Alternatively, the format could be "NCHW", the data storage order of:
[batch, in_channels, in_height, in_width].
output: 4-D. Gradients w.r.t. the input of `avg_pool`.
)doc");
......@@ -344,6 +356,7 @@ REGISTER_OP("MaxPool")
.Attr("ksize: list(int) >= 4")
.Attr("strides: list(int) >= 4")
.Attr(GetPaddingAttrString())
.Attr(GetConvnetDataFormatAttrString())
.Input("input: float")
.Output("output: float")
.Doc(R"doc(
......@@ -353,6 +366,11 @@ ksize: The size of the window for each dimension of the input tensor.
strides: The stride of the sliding window for each dimension of the
input tensor.
padding: The type of padding algorithm to use.
data_format: Specify the data format of the input and output data. With the
default format "NHWC", the data is stored in the order of:
[batch, in_height, in_width, in_channels].
Alternatively, the format could be "NCHW", the data storage order of:
[batch, in_channels, in_height, in_width].
input: 4-D input to pool over.
output: The max pooled output tensor.
)doc");
......@@ -361,6 +379,7 @@ REGISTER_OP("MaxPoolGrad")
.Attr("ksize: list(int) >= 4")
.Attr("strides: list(int) >= 4")
.Attr(GetPaddingAttrString())
.Attr(GetConvnetDataFormatAttrString())
.Input("orig_input: float")
.Input("orig_output: float")
.Input("grad: float")
......@@ -372,6 +391,11 @@ ksize: The size of the window for each dimension of the input tensor.
strides: The stride of the sliding window for each dimension of the
input tensor.
padding: The type of padding algorithm to use.
data_format: Specify the data format of the input and output data. With the
default format "NHWC", the data is stored in the order of:
[batch, in_height, in_width, in_channels].
Alternatively, the format could be "NCHW", the data storage order of:
[batch, in_channels, in_height, in_width].
orig_input: The original input tensor.
orig_output: The original output tensor.
grad: 4-D. Gradients w.r.t. the output of `max_pool`.
......
......@@ -993,6 +993,20 @@ op {
}
}
}
attr {
name: "data_format"
type: "string"
default_value {
s: "NHWC"
}
description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n [batch, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n [batch, in_channels, in_height, in_width]."
allowed_values {
list {
s: "NHWC"
s: "NCHW"
}
}
}
attr {
name: "T"
type: "type"
......@@ -1048,6 +1062,20 @@ op {
}
}
}
attr {
name: "data_format"
type: "string"
default_value {
s: "NHWC"
}
description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n [batch, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n [batch, in_channels, in_height, in_width]."
allowed_values {
list {
s: "NHWC"
s: "NCHW"
}
}
}
attr {
name: "T"
type: "type"
......@@ -4694,6 +4722,20 @@ op {
}
}
}
attr {
name: "data_format"
type: "string"
default_value {
s: "NHWC"
}
description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n [batch, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n [batch, in_channels, in_height, in_width]."
allowed_values {
list {
s: "NHWC"
s: "NCHW"
}
}
}
summary: "Performs max pooling on the input."
}
op {
......@@ -4743,6 +4785,20 @@ op {
}
}
}
attr {
name: "data_format"
type: "string"
default_value {
s: "NHWC"
}
description: "Specify the data format of the input and output data. With the\ndefault format \"NHWC\", the data is stored in the order of:\n [batch, in_height, in_width, in_channels].\nAlternatively, the format could be \"NCHW\", the data storage order of:\n [batch, in_channels, in_height, in_width]."
allowed_values {
list {
s: "NHWC"
s: "NCHW"
}
}
}
summary: "Computes gradients of the maxpooling function."
}
op {
......
......@@ -69,17 +69,6 @@ inline int32 GetTensorDimIndex(TensorFormat format, char dimension) {
}
}
// Return a tensor shape from the given format, and tensor dimensions.
inline TensorShape ShapeFromFormat(TensorFormat format, int64 N, int64 H,
int64 W, int64 C) {
std::vector<int64> dim_sizes(4);
dim_sizes[GetTensorDimIndex(format, 'N')] = N;
dim_sizes[GetTensorDimIndex(format, 'H')] = H;
dim_sizes[GetTensorDimIndex(format, 'W')] = W;
dim_sizes[GetTensorDimIndex(format, 'C')] = C;
return TensorShape(dim_sizes);
}
// Return the given tensor dimension from a tensor. The tensor is interpretted
// using the specified format, and a dimension specification using a char.
inline int64 GetTensorDim(const Tensor& tensor, TensorFormat format,
......@@ -120,6 +109,36 @@ T GetTensorDim(const std::vector<T>& attributes, TensorFormat format,
// Return the string that specifies the data format for convnet operations.
string GetConvnetDataFormatAttrString();
// Return a tensor shape from the given format, and tensor dimensions.
inline TensorShape ShapeFromFormat(TensorFormat format, int64 N, int64 H,
int64 W, int64 C) {
std::vector<int64> dim_sizes(4);
dim_sizes[GetTensorDimIndex(format, 'N')] = N;
dim_sizes[GetTensorDimIndex(format, 'H')] = H;
dim_sizes[GetTensorDimIndex(format, 'W')] = W;
dim_sizes[GetTensorDimIndex(format, 'C')] = C;
return TensorShape(dim_sizes);
}
// Return a tensor shape from the given format, and tensor dimensions.
inline TensorShape ShapeFromFormat(TensorFormat dst_format,
const TensorShape& src_shape,
TensorFormat src_format) {
if (src_format == dst_format) {
return src_shape;
}
std::vector<int64> dim_sizes(4);
dim_sizes[GetTensorDimIndex(dst_format, 'N')] =
GetTensorDim(src_shape, src_format, 'N');
dim_sizes[GetTensorDimIndex(dst_format, 'H')] =
GetTensorDim(src_shape, src_format, 'H');
dim_sizes[GetTensorDimIndex(dst_format, 'W')] =
GetTensorDim(src_shape, src_format, 'W');
dim_sizes[GetTensorDimIndex(dst_format, 'C')] =
GetTensorDim(src_shape, src_format, 'C');
return TensorShape(dim_sizes);
}
} // namespace tensorflow
#endif // TENSORFLOW_UTIL_TENSOR_FORMAT_H_
......@@ -21,9 +21,51 @@ from __future__ import print_function
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import test_util
from tensorflow.python.ops import gen_nn_ops
def NHWCToNCHW(input_tensor):
"""Convert the input from NHWC format to NCHW.
Args:
input_tensor: a 4-D tensor, or a 4-element array representing the same.
Returns:
the converted tensor or a shape array
"""
if isinstance(input_tensor, tf.Tensor):
return tf.transpose(input_tensor, [0, 3, 1, 2])
else:
return [input_tensor[0], input_tensor[3], input_tensor[1], input_tensor[2]]
def NCHWToNHWC(input_tensor):
"""Convert the input from NCHW format to NHWC.
Args:
input_tensor: a 4-D tensor, or a 4-element array representing the same.
Returns:
the converted tensor or a shape array
"""
if isinstance(input_tensor, tf.Tensor):
return tf.transpose(input_tensor, [0, 2, 3, 1])
else:
return [input_tensor[0], input_tensor[2], input_tensor[3], input_tensor[1]]
def GetTestConfigs():
"""Get all the valid tests configs to run.
Returns:
all the valid test configs as tuples of data_format and use_gpu.
"""
test_configs = [("NHWC", False), ("NHWC", True)]
if test_util.IsGoogleCudaEnabled():
# "NCHW" format is not currently supported on CPU.
test_configs += [("NCHW", True)]
return test_configs
def GetInceptionMaxPoolShapes():
"""Iterator for some of the max pool ops in the Inception 2015 model.
......@@ -47,8 +89,8 @@ def GetInceptionMaxPoolShapes():
class PoolingTest(tf.test.TestCase):
def _VerifyValues(self, pool_func, input_sizes, ksize, strides, padding,
expected, use_gpu):
def _VerifyOneTest(self, pool_func, input_sizes, ksize, strides, padding,
data_format, expected, use_gpu):
"""Verifies the output values of the pooling function.
Args:
......@@ -58,6 +100,7 @@ class PoolingTest(tf.test.TestCase):
ksize: The kernel size dimensions
strides: The stride dimensions
padding: Padding type.
data_format: The data format we use to run the pooling operation.
expected: An array containing the expected operation outputs.
use_gpu: Whether we are running on GPU.
"""
......@@ -69,11 +112,37 @@ class PoolingTest(tf.test.TestCase):
x = [f * 1.0 for f in range(1, total_size + 1)]
with self.test_session(use_gpu=use_gpu) as sess:
t = tf.constant(x, shape=input_sizes)
t = pool_func(t, ksize=ksize, strides=strides, padding=padding)
if data_format == "NCHW":
t = NHWCToNCHW(t)
ksize = NHWCToNCHW(ksize)
strides = NHWCToNCHW(strides)
t = pool_func(t, ksize=ksize, strides=strides, padding=padding,
data_format=data_format)
if data_format == "NCHW":
t = NCHWToNHWC(t)
actual = t.eval()
self.assertAllClose(expected, actual.flatten())
self.assertShapeEqual(actual, t)
def _VerifyValues(self, pool_func, input_sizes, ksize, strides, padding,
expected, use_gpu):
"""Verifies the output values of the pooling function.
Args:
pool_func: Function to be called, co.MaxPool, co.AvgPool,
or the Lua version.
input_sizes: Input tensor dimensions.
ksize: The kernel size dimensions
strides: The stride dimensions
padding: Padding type.
expected: An array containing the expected operation outputs.
use_gpu: Whether we are running on GPU.
"""
for (data_format, use_gpu_2) in GetTestConfigs():
if use_gpu_2 == use_gpu:
self._VerifyOneTest(pool_func, input_sizes, ksize, strides, padding,
data_format, expected, use_gpu)
def _testAvgPoolValidPadding(self, use_gpu):
expected_output = [7.0, 8.0, 9.0]
self._VerifyValues(tf.nn.avg_pool, input_sizes=[1, 3, 3, 3],
......@@ -395,7 +464,7 @@ class PoolingTest(tf.test.TestCase):
def _ConstructAndTestGradient(self, pool_func, input_sizes, output_sizes,
window_rows, window_cols, row_stride,
col_stride, padding, use_gpu,
col_stride, padding, data_format, use_gpu,
x_init_value=None):
"""Verifies the gradients of the avg pooling function.
......@@ -409,6 +478,7 @@ class PoolingTest(tf.test.TestCase):
row_stride: Row Stride.
col_stride: Col Stride.
padding: Padding type.
data_format: Data format.
use_gpu: whether we are running on GPU
x_init_value: Values to be passed to the gradient checker.
"""
......@@ -430,9 +500,19 @@ class PoolingTest(tf.test.TestCase):
dtype=np.float32).reshape(input_sizes)
func_name = "max_pool"
err_margin = 1e-3
t = pool_func(input_tensor, ksize=[1, window_rows, window_rows, 1],
strides=[1, row_stride, col_stride, 1],
padding=padding, name=func_name)
if data_format == "NCHW":
ksize = [1, 1, window_rows, window_rows]
strides = [1, 1, row_stride, col_stride]
t = NHWCToNCHW(input_tensor)
else:
ksize = [1, window_rows, window_rows, 1]
strides = [1, row_stride, col_stride, 1]
t = input_tensor
t = pool_func(t, ksize=ksize, strides=strides, padding=padding,
data_format=data_format, name=func_name)
if data_format == "NCHW":
t = NCHWToNHWC(t)
err = tf.test.compute_gradient_error(input_tensor,
input_sizes,
t,
......@@ -442,64 +522,64 @@ class PoolingTest(tf.test.TestCase):
print("%s gradient error = " % func_name, err)
self.assertLess(err, err_margin)
def _testMaxPoolGradValidPadding1_1(self, use_gpu):
def _testMaxPoolGradValidPadding1_1(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.max_pool, input_sizes=[1, 3, 3, 1],
output_sizes=[1, 3, 3, 1], window_rows=1, window_cols=1, row_stride=1,
col_stride=1, padding="VALID", use_gpu=use_gpu)
col_stride=1, padding="VALID", data_format=data_format, use_gpu=use_gpu)
def _testMaxPoolGradValidPadding2_1_6(self, use_gpu):
def _testMaxPoolGradValidPadding2_1_6(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.max_pool, input_sizes=[2, 6, 6, 3],
output_sizes=[2, 5, 5, 3], window_rows=2, window_cols=2, row_stride=1,
col_stride=1, padding="VALID", use_gpu=use_gpu)
col_stride=1, padding="VALID", data_format=data_format, use_gpu=use_gpu)
def _testMaxPoolGradValidPadding2_1_7(self, use_gpu):
def _testMaxPoolGradValidPadding2_1_7(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.max_pool, input_sizes=[2, 7, 7, 3],
output_sizes=[2, 6, 6, 3], window_rows=2, window_cols=2, row_stride=1,
col_stride=1, padding="VALID", use_gpu=use_gpu)
col_stride=1, padding="VALID", data_format=data_format, use_gpu=use_gpu)
def _testMaxPoolGradValidPadding2_2(self, use_gpu):
def _testMaxPoolGradValidPadding2_2(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.max_pool, input_sizes=[2, 2, 2, 3],
output_sizes=[2, 1, 1, 3], window_rows=2, window_cols=2, row_stride=2,
col_stride=2, padding="VALID", use_gpu=use_gpu)
col_stride=2, padding="VALID", data_format=data_format, use_gpu=use_gpu)
def _testMaxPoolGradSamePadding1_1(self, use_gpu):
def _testMaxPoolGradSamePadding1_1(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.max_pool, input_sizes=[2, 2, 4, 3],
output_sizes=[2, 2, 4, 3], window_rows=1, window_cols=1, row_stride=1,
col_stride=1, padding="SAME", use_gpu=use_gpu)
col_stride=1, padding="SAME", data_format=data_format, use_gpu=use_gpu)
def _testMaxPoolGradSamePadding2_1(self, use_gpu):
def _testMaxPoolGradSamePadding2_1(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.max_pool, input_sizes=[2, 2, 4, 3],
output_sizes=[2, 2, 4, 3], window_rows=2, window_cols=2, row_stride=1,
col_stride=1, padding="SAME", use_gpu=use_gpu)
col_stride=1, padding="SAME", data_format=data_format, use_gpu=use_gpu)
def _testMaxPoolGradSamePadding2_2(self, use_gpu):
def _testMaxPoolGradSamePadding2_2(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.max_pool, input_sizes=[2, 2, 4, 3],
output_sizes=[2, 1, 2, 3], window_rows=2, window_cols=2, row_stride=2,
col_stride=2, padding="SAME", use_gpu=use_gpu)
col_stride=2, padding="SAME", data_format=data_format, use_gpu=use_gpu)
def _testMaxPoolGradSamePadding3_1(self, use_gpu):
def _testMaxPoolGradSamePadding3_1(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.max_pool, input_sizes=[1, 7, 7, 1],
output_sizes=[1, 7, 7, 1], window_rows=3, window_cols=3, row_stride=1,
col_stride=1, padding="SAME", use_gpu=use_gpu)
col_stride=1, padding="SAME", data_format=data_format, use_gpu=use_gpu)
def testMaxPoolGrad(self):
for use_gpu in True, False:
self._testMaxPoolGradValidPadding1_1(use_gpu=use_gpu)
self._testMaxPoolGradValidPadding2_1_6(use_gpu=use_gpu)
self._testMaxPoolGradValidPadding2_1_7(use_gpu=use_gpu)
self._testMaxPoolGradValidPadding2_2(use_gpu=use_gpu)
self._testMaxPoolGradSamePadding1_1(use_gpu=use_gpu)
self._testMaxPoolGradSamePadding2_1(use_gpu=use_gpu)
self._testMaxPoolGradSamePadding2_2(use_gpu=use_gpu)
self._testMaxPoolGradSamePadding3_1(use_gpu=use_gpu)
for (data_format, use_gpu) in GetTestConfigs():
self._testMaxPoolGradValidPadding1_1(data_format, use_gpu)
self._testMaxPoolGradValidPadding2_1_6(data_format, use_gpu)
self._testMaxPoolGradValidPadding2_1_7(data_format, use_gpu)
self._testMaxPoolGradValidPadding2_2(data_format, use_gpu)
self._testMaxPoolGradSamePadding1_1(data_format, use_gpu)
self._testMaxPoolGradSamePadding2_1(data_format, use_gpu)
self._testMaxPoolGradSamePadding2_2(data_format, use_gpu)
self._testMaxPoolGradSamePadding3_1(data_format, use_gpu)
def _MaxPoolGrad(self, orig_input, orig_output, grad, window_rows,
window_cols, row_stride, col_stride, padding):
......@@ -697,56 +777,56 @@ class PoolingTest(tf.test.TestCase):
self._testMaxPoolGradDirectWithNans2_2()
def testAvgPoolGrad(self):
for use_gpu in False, True:
self._testAvgPoolGradValidPadding1_1(use_gpu)
self._testAvgPoolGradValidPadding2_1(use_gpu)
self._testAvgPoolGradValidPadding2_2(use_gpu)
self._testAvgPoolGradSamePadding1_1(use_gpu)
self._testAvgPoolGradSamePadding2_1(use_gpu)
self._testAvgPoolGradSamePadding2_2(use_gpu)
self._testAvgPoolGradSamePadding3_1(use_gpu)
def _testAvgPoolGradValidPadding1_1(self, use_gpu):
for (data_format, use_gpu) in GetTestConfigs():
self._testAvgPoolGradValidPadding1_1(data_format, use_gpu)
self._testAvgPoolGradValidPadding2_1(data_format, use_gpu)
self._testAvgPoolGradValidPadding2_2(data_format, use_gpu)
self._testAvgPoolGradSamePadding1_1(data_format, use_gpu)
self._testAvgPoolGradSamePadding2_1(data_format, use_gpu)
self._testAvgPoolGradSamePadding2_2(data_format, use_gpu)
self._testAvgPoolGradSamePadding3_1(data_format, use_gpu)
def _testAvgPoolGradValidPadding1_1(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.avg_pool, input_sizes=[2, 3, 3, 3],
output_sizes=[2, 3, 3, 3], window_rows=1, window_cols=1, row_stride=1,
col_stride=1, padding="VALID", use_gpu=use_gpu)
col_stride=1, padding="VALID", data_format=data_format, use_gpu=use_gpu)
def _testAvgPoolGradValidPadding2_1(self, use_gpu):
def _testAvgPoolGradValidPadding2_1(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.avg_pool, input_sizes=[2, 3, 3, 3],
output_sizes=[2, 2, 2, 3], window_rows=2, window_cols=2, row_stride=1,
col_stride=1, padding="VALID", use_gpu=use_gpu)
col_stride=1, padding="VALID", data_format=data_format, use_gpu=use_gpu)
def _testAvgPoolGradValidPadding2_2(self, use_gpu):
def _testAvgPoolGradValidPadding2_2(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.avg_pool, input_sizes=[2, 2, 2, 3],
output_sizes=[2, 1, 1, 3], window_rows=2, window_cols=2, row_stride=2,
col_stride=2, padding="VALID", use_gpu=use_gpu)
col_stride=2, padding="VALID", data_format=data_format, use_gpu=use_gpu)
def _testAvgPoolGradSamePadding1_1(self, use_gpu):
def _testAvgPoolGradSamePadding1_1(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.avg_pool, input_sizes=[2, 2, 4, 3],
output_sizes=[2, 2, 4, 3], window_rows=1, window_cols=1, row_stride=1,
col_stride=1, padding="SAME", use_gpu=use_gpu)
col_stride=1, padding="SAME", data_format=data_format, use_gpu=use_gpu)
def _testAvgPoolGradSamePadding2_1(self, use_gpu):
def _testAvgPoolGradSamePadding2_1(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.avg_pool, input_sizes=[2, 2, 4, 3],
output_sizes=[2, 2, 4, 3], window_rows=2, window_cols=2, row_stride=1,
col_stride=1, padding="SAME", use_gpu=use_gpu)
col_stride=1, padding="SAME", data_format=data_format, use_gpu=use_gpu)
def _testAvgPoolGradSamePadding2_2(self, use_gpu):
def _testAvgPoolGradSamePadding2_2(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.avg_pool, input_sizes=[2, 2, 4, 3],
output_sizes=[2, 1, 2, 3], window_rows=2, window_cols=2, row_stride=2,
col_stride=2, padding="SAME", use_gpu=use_gpu)
col_stride=2, padding="SAME", data_format=data_format, use_gpu=use_gpu)
def _testAvgPoolGradSamePadding3_1(self, use_gpu):
def _testAvgPoolGradSamePadding3_1(self, data_format, use_gpu):
self._ConstructAndTestGradient(
tf.nn.avg_pool, input_sizes=[1, 7, 7, 1],
output_sizes=[1, 7, 7, 1], window_rows=3, window_cols=3, row_stride=1,
col_stride=1, padding="SAME", use_gpu=use_gpu)
col_stride=1, padding="SAME", data_format=data_format, use_gpu=use_gpu)
def testShapeFunctionEdgeCases(self):
# All shapes unknown.
......
......@@ -309,8 +309,22 @@ def avg_pool_shape(op):
the values of the attrs.
"""
input_shape = op.inputs[0].get_shape().with_rank(4)
ksize_b, ksize_r, ksize_c, ksize_d = op.get_attr("ksize")
stride_b, stride_r, stride_c, stride_d = op.get_attr("strides")
try:
data_format = op.get_attr("data_format")
except ValueError:
data_format = None
if data_format == "NCHW":
# Convert input shape to the dfeault NHWC for inference.
input_shape = [input_shape[0], input_shape[2], input_shape[3],
input_shape[1]]
if data_format == "NCHW":
ksize_b, ksize_d, ksize_r, ksize_c = op.get_attr("ksize")
stride_b, stride_d, stride_r, stride_c = op.get_attr("strides")
else:
ksize_b, ksize_r, ksize_c, ksize_d = op.get_attr("ksize")
stride_b, stride_r, stride_c, stride_d = op.get_attr("strides")
batch_size = input_shape[0]
in_rows = input_shape[1]
......@@ -332,7 +346,12 @@ def avg_pool_shape(op):
out_rows, out_cols = get2d_conv_output_size(
in_rows, in_cols, ksize_r, ksize_c, stride_r, stride_c, padding)
return [tensor_shape.TensorShape([batch_size, out_rows, out_cols, depth])]
output_shape = [batch_size, out_rows, out_cols, depth]
if data_format == "NCHW":
# Convert output shape back to NCHW.
output_shape = [output_shape[0], output_shape[3], output_shape[1],
output_shape[2]]
return [tensor_shape.TensorShape(output_shape)]
def max_pool_shape(op):
......@@ -357,8 +376,22 @@ def max_pool_shape(op):
the values of the attrs.
"""
input_shape = op.inputs[0].get_shape().with_rank(4)
ksize_b, ksize_r, ksize_c, ksize_d = op.get_attr("ksize")
stride_b, stride_r, stride_c, stride_d = op.get_attr("strides")
try:
data_format = op.get_attr("data_format")
except ValueError:
data_format = None
if data_format == "NCHW":
# Convert input shape to the default NHWC for inference.
input_shape = [input_shape[0], input_shape[2], input_shape[3],
input_shape[1]]
if data_format == "NCHW":
ksize_b, ksize_d, ksize_r, ksize_c = op.get_attr("ksize")
stride_b, stride_d, stride_r, stride_c = op.get_attr("strides")
else:
ksize_b, ksize_r, ksize_c, ksize_d = op.get_attr("ksize")
stride_b, stride_r, stride_c, stride_d = op.get_attr("strides")
batch_size = input_shape[0]
in_rows = input_shape[1]
......@@ -383,7 +416,7 @@ def max_pool_shape(op):
padding = op.get_attr("padding")
out_rows, out_cols = get2d_conv_output_size(
in_rows, in_cols, ksize_r, ksize_c, stride_r, stride_c, padding)
return [tensor_shape.TensorShape([batch_size, out_rows, out_cols, depth])]
output_shape = [batch_size, out_rows, out_cols, depth]
else:
if depth % ksize_d > 0:
raise ValueError("Depthwise max pooling requires the depth window "
......@@ -391,8 +424,13 @@ def max_pool_shape(op):
if stride_d != ksize_d:
raise ValueError("Depthwise max pooling requires the depth window "
"to equal the depth stride.")
return [tensor_shape.TensorShape([batch_size, in_rows, in_cols, depth //
ksize_d])]
output_shape = [batch_size, in_rows, in_cols, depth // ksize_d]
if data_format == "NCHW":
# Convert output shape back to NCHW.
output_shape = [output_shape[0], output_shape[3], output_shape[1],
output_shape[2]]
return [tensor_shape.TensorShape(output_shape)]
def no_outputs(unused_op):
......
......@@ -196,7 +196,9 @@ def _AvgPoolGrad(op, grad):
return gen_nn_ops._avg_pool_grad(array_ops.shape(op.inputs[0]), grad,
op.get_attr("ksize"),
op.get_attr("strides"),
op.get_attr("padding"))
op.get_attr("padding"),
data_format=op.get_attr("data_format")
)
@ops.RegisterGradient("MaxPool")
......@@ -204,7 +206,9 @@ def _MaxPoolGrad(op, grad):
return gen_nn_ops._max_pool_grad(op.inputs[0], op.outputs[0], grad,
op.get_attr("ksize"),
op.get_attr("strides"),
padding=op.get_attr("padding"))
padding=op.get_attr("padding"),
data_format=op.get_attr("data_format")
)
@ops.RegisterGradient("BatchNormWithGlobalNormalization")
......
......@@ -247,7 +247,7 @@ def _SoftmaxCrossEntropyWithLogitsShape(op):
return [tensor_shape.vector(batch_size.value), input_shape]
def avg_pool(value, ksize, strides, padding, name=None):
def avg_pool(value, ksize, strides, padding, data_format="NHWC", name=None):
"""Performs the average pooling on the input.
Each entry in `output` is the mean of the corresponding size `ksize`
......@@ -262,6 +262,7 @@ def avg_pool(value, ksize, strides, padding, name=None):
The stride of the sliding window for each dimension of the
input tensor.
padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
data_format: A string. 'NHWC' and 'NCHW" are supported.
name: Optional name for the operation.
Returns:
......@@ -271,10 +272,11 @@ def avg_pool(value, ksize, strides, padding, name=None):
value = ops.convert_to_tensor(value, name="input")
return gen_nn_ops._avg_pool(value, ksize=ksize, strides=strides,
padding=padding,
data_format=data_format,
name=name)
def max_pool(value, ksize, strides, padding, name=None):
def max_pool(value, ksize, strides, padding, data_format="NHWC", name=None):
"""Performs the max pooling on the input.
Args:
......@@ -285,6 +287,7 @@ def max_pool(value, ksize, strides, padding, name=None):
strides: A list of ints that has length >= 4. The stride of the sliding
window for each dimension of the input tensor.
padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
data_format: A string. 'NHWC' and 'NCHW" are supported.
name: Optional name for the operation.
Returns:
......@@ -294,6 +297,7 @@ def max_pool(value, ksize, strides, padding, name=None):
value = ops.convert_to_tensor(value, name="input")
return gen_nn_ops._max_pool(value, ksize=ksize, strides=strides,
padding=padding,
data_format=data_format,
name=name)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册