提交 e82f1008 编写于 作者: W wanghaoshuang

Finish block expand op

1. Add lod to output
2. Fix im2col arguments list
3. Refine code and doc
4. Fix output shape
上级 25a3d2d7
...@@ -32,37 +32,27 @@ class BlockExpandOp : public framework::OperatorWithKernel { ...@@ -32,37 +32,27 @@ class BlockExpandOp : public framework::OperatorWithKernel {
auto in_dim = ctx->GetInputDim("X"); auto in_dim = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(in_dim.size(), 4, PADDLE_ENFORCE_EQ(in_dim.size(), 4,
"Input(X) format must be 4D tensor, eg., NCHW."); "Input(X) format must be 4D tensor, eg., NCHW.");
PADDLE_ENFORCE_GE(in_dim[0], 1, "Input batchsize must >= 1.");
int block_height = ctx->Attrs().Get<int>("blockHeight"); int block_height = ctx->Attrs().Get<int>("block_height");
int block_width = ctx->Attrs().Get<int>("blockWidth"); int block_width = ctx->Attrs().Get<int>("block_width");
int stride_height = ctx->Attrs().Get<int>("strideHeight"); int stride_height = ctx->Attrs().Get<int>("stride_height");
int stride_width = ctx->Attrs().Get<int>("strideWidth"); int stride_width = ctx->Attrs().Get<int>("stride_width");
int padding_height = ctx->Attrs().Get<int>("paddingHeight"); int padding_height = ctx->Attrs().Get<int>("padding_height");
int padding_width = ctx->Attrs().Get<int>("paddingWidth"); int padding_width = ctx->Attrs().Get<int>("padding_width");
int N = in_dim[0]; int batch_size = in_dim[0];
int C = in_dim[1]; int img_channels = in_dim[1];
int img_height = in_dim[2]; int img_height = in_dim[2];
int img_width = in_dim[3]; int img_width = in_dim[3];
int output_height = 0; int output_height = get_output_size(img_height, block_height, stride_height,
int output_width = 0; padding_height);
int output_width =
get_output_size(img_width, block_width, stride_width, padding_width);
get_blockexpand_output_shape(img_height, img_width, block_height, ctx->SetOutputDim("Out", {batch_size * output_height * output_width,
block_width, stride_height, stride_width, img_channels * block_height * block_width});
padding_height, padding_width, output_height, // TODO(wanghaoshuang): cal lod in complie time
output_width);
// The result of im2col is [output_height, output_width,
// inputChannels, filterHeight, filterWidth], and it is easy to
// reshape into [seqLength, stepSize], where seqLength is equal
// output_height * output_width, stepSize is equal
// input_channels * blockHeight * blockWidth
ctx->SetOutputDim(
"Out", {N, output_height, output_width, C, block_height, block_width});
// ctx->ShareLoD("X", /*->*/ "Out");
} }
}; };
...@@ -79,28 +69,69 @@ class BlockExpandOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -79,28 +69,69 @@ class BlockExpandOpMaker : public framework::OpProtoAndCheckerMaker {
W: width W: width
)DOC"); )DOC");
AddOutput("Out", "(LodTensor)The output data of block_expand op,"); AddOutput("Out", "(LodTensor)The output data of block_expand op,");
AddAttr<int>("blockHeight", "(int)height of block."); AddAttr<int>("block_height", "(int)height of block.");
AddAttr<int>("blockWidth", "(int)width of block."); AddAttr<int>("block_width", "(int)width of block.");
AddAttr<int>("strideHeight", "(int)height of stride."); AddAttr<int>("stride_height", "(int)height of stride.");
AddAttr<int>("strideWidth", "(int)width of stride."); AddAttr<int>("stride_width", "(int)width of stride.");
AddAttr<int>("paddingHeight", "(int)height of padding."); AddAttr<int>("padding_height", "(int)height of padding.");
AddAttr<int>("paddingWidth", "(int)width of padding."); AddAttr<int>("padding_width", "(int)width of padding.");
AddComment(R"DOC( AddComment(R"DOC(
Expand feature map to minibatch matrix. Expand feature map to minibatch matrix.
- matirx height is: output_height * output_width - matirx height is: output_height * output_width
- matrix width is: blockHeight * blockWidth * channels - matrix width is: block_height * block_width * channels
output_height = output_height =
1 + (2 * paddingHeight + img_height - blockHeight + strideHeight - 1) / 1 + (2 * padding_height + img_height - block_height + stride_height - 1) /
strideHeight; stride_height;
output_width = output_width =
1 + (2 * paddingWidth + img_width - blockWidth + strideWidth - 1) / 1 + (2 * padding_width + img_width - block_width + stride_width - 1) /
strideWidth; stride_width;
After expanding, The number of time steps are output_height * output_width
and the dimension of each time step is block_height * block_width * channels.
This op can be used after convolution neural network, and before recurrent neural network.
Given:
x = [[[[ 6. 2. 1.]
[ 8. 3. 5.]
[ 0. 2. 6.]]
[[ 2. 4. 4.]
[ 6. 3. 0.]
[ 6. 4. 7.]]]
[[[ 6. 7. 1.]
[ 5. 7. 9.]
[ 2. 4. 8.]]
[[ 1. 2. 1.]
[ 1. 3. 5.]
[ 9. 0. 8.]]]]
x.dims = {2, 2, 3, 3}
And:
block_height = 2
block_width = 2
stride_height = 1
stride_width = 1
padding_height = 0
padding_width = 0
Then:
output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.]
[ 2. 1. 3. 5. 4. 4. 3. 0.]
[ 8. 3. 0. 2. 6. 3. 6. 4.]
[ 3. 5. 2. 6. 3. 0. 4. 7.]
[ 6. 7. 5. 7. 1. 2. 1. 3.]
[ 7. 1. 7. 9. 2. 1. 3. 5.]
[ 5. 7. 2. 4. 1. 3. 9. 0.]
[ 7. 9. 4. 8. 3. 5. 0. 8.]]
output.dims = {8, 9}
output.lod = [[0, 4, 8]]
The expand method is the same with ExpandConvLayer, but saved the transposed
value. After expanding, The number of time steps are output_height * output_width
and the dimension of each time step is blockHeight * blockWidth * channels.
This layer can be used after convolution neural network, and before recurrent neural network.
)DOC"); )DOC");
} }
}; };
......
...@@ -23,20 +23,9 @@ ...@@ -23,20 +23,9 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
inline void get_blockexpand_output_shape(int img_height, int img_width, inline int get_output_size(int img_size, int block_size, int stride,
int block_height, int block_width, int padding) {
int stride_height, int stride_width, return (1 + (img_size + 2 * padding - block_size + stride - 1) / stride);
int padding_height, int padding_width,
int& outputHeight, int& outputWidth) {
outputHeight =
1 +
(img_height + 2 * padding_height - block_height + stride_height - 1) /
stride_height;
outputWidth =
1 +
(img_width + 2 * padding_width - block_width + stride_width - 1) /
stride_width;
} }
template <typename Place, typename T> template <typename Place, typename T>
...@@ -45,40 +34,54 @@ class BlockExpandKernel : public framework::OpKernel<T> { ...@@ -45,40 +34,54 @@ class BlockExpandKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
using namespace framework; using namespace framework;
const Tensor* in = ctx.Input<Tensor>("X"); const Tensor* in = ctx.Input<Tensor>("X");
Tensor* out = ctx.Output<Tensor>("Out"); LoDTensor* out = ctx.Output<LoDTensor>("Out");
out->mutable_data<T>(ctx.GetPlace()); out->mutable_data<T>(ctx.GetPlace());
auto in_dim = in->dims(); auto in_dim = in->dims();
int N = in_dim[0]; int batch_size = in_dim[0];
int C = in_dim[1]; int img_channels = in_dim[1];
int img_height = in_dim[2]; int img_height = in_dim[2];
int img_width = in_dim[3]; int img_width = in_dim[3];
int block_height = ctx.Attr<int>("block_height");
int block_height = ctx.Attr<int>("blockHeight"); int block_width = ctx.Attr<int>("block_width");
int block_width = ctx.Attr<int>("blockWidth"); int stride_height = ctx.Attr<int>("stride_height");
int stride_height = ctx.Attr<int>("strideHeight"); int stride_width = ctx.Attr<int>("stride_width");
int stride_width = ctx.Attr<int>("strideWidth"); int padding_height = ctx.Attr<int>("padding_height");
int padding_height = ctx.Attr<int>("paddingHeight"); int padding_width = ctx.Attr<int>("padding_width");
int padding_width = ctx.Attr<int>("paddingWidth");
int output_height = get_output_size(img_height, block_height, stride_height,
int outputHeight = 0; padding_height);
int outputWidth = 0; int output_width =
get_output_size(img_width, block_width, stride_width, padding_width);
get_blockexpand_output_shape(
img_height, img_width, block_height, block_width, stride_height, const std::vector<int> dilations({1, 1});
stride_width, padding_height, padding_width, outputHeight, outputWidth); const std::vector<int> strides(
{stride_height, stride_width, stride_height, stride_width});
std::vector<int> stride({stride_height, stride_width}); const std::vector<int> paddings(
std::vector<int> padding({padding_height, padding_width}); {padding_height, padding_width, padding_height, padding_width});
for (int i = 0; i < N; i++) { auto out_dims = out->dims();
Tensor src = in->Slice(i, i + 1).Resize({C, img_height, img_width}); out->Resize({batch_size, out->numel() / batch_size});
Tensor dst = out->Slice(i, i + 1).Resize( for (int i = 0; i < batch_size; i++) {
{outputHeight, outputWidth, C, block_height, block_width}); const Tensor src =
in->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
Tensor dst = out->Slice(i, i + 1).Resize({output_height, output_width,
img_channels, block_height,
block_width});
math::Im2ColFunctor<math::ColFormat::kOCF, Place, T> f; math::Im2ColFunctor<math::ColFormat::kOCF, Place, T> f;
f(ctx.device_context(), src, stride, padding, &dst); f(ctx.device_context(), src, dilations, strides, paddings, &dst);
} }
out->Resize(out_dims);
// set lod information
// TODO(wanghaoshuang): Move this to InferShape
framework::LoD lod(1);
for (int i = 0, offset = 0; i < batch_size + 1; ++i) {
lod[0].push_back(offset);
offset += output_height * output_width;
}
out->set_lod(lod);
} }
}; };
...@@ -88,7 +91,8 @@ class BlockExpandGradKernel : public framework::OpKernel<T> { ...@@ -88,7 +91,8 @@ class BlockExpandGradKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
using namespace framework; using namespace framework;
auto* in = ctx.Input<Tensor>("X"); auto* in = ctx.Input<Tensor>("X");
auto* d_out = ctx.Input<Tensor>(framework::GradVarName("Out")); Tensor* d_out =
const_cast<Tensor*>(ctx.Input<Tensor>(framework::GradVarName("Out")));
auto* d_x = ctx.Output<Tensor>(GradVarName("X")); auto* d_x = ctx.Output<Tensor>(GradVarName("X"));
d_x->mutable_data<T>(ctx.GetPlace()); d_x->mutable_data<T>(ctx.GetPlace());
...@@ -96,36 +100,40 @@ class BlockExpandGradKernel : public framework::OpKernel<T> { ...@@ -96,36 +100,40 @@ class BlockExpandGradKernel : public framework::OpKernel<T> {
x_v.device(ctx.GetEigenDevice<Place>()) = x_v.constant(0.0); x_v.device(ctx.GetEigenDevice<Place>()) = x_v.constant(0.0);
auto in_dim = in->dims(); auto in_dim = in->dims();
int N = in_dim[0]; int batch_size = in_dim[0];
int C = in_dim[1]; int img_channels = in_dim[1];
int img_height = in_dim[2]; int img_height = in_dim[2];
int img_width = in_dim[3]; int img_width = in_dim[3];
int block_height = ctx.Attr<int>("blockHeight"); int block_height = ctx.Attr<int>("block_height");
int block_width = ctx.Attr<int>("blockWidth"); int block_width = ctx.Attr<int>("block_width");
int stride_height = ctx.Attr<int>("strideHeight"); int stride_height = ctx.Attr<int>("stride_height");
int stride_width = ctx.Attr<int>("strideWidth"); int stride_width = ctx.Attr<int>("stride_width");
int padding_height = ctx.Attr<int>("paddingHeight"); int padding_height = ctx.Attr<int>("padding_height");
int padding_width = ctx.Attr<int>("paddingWidth"); int padding_width = ctx.Attr<int>("padding_width");
int output_height = get_output_size(img_height, block_height, stride_height,
int outputHeight = 0; padding_height);
int outputWidth = 0; int output_width =
get_output_size(img_width, block_width, stride_width, padding_width);
get_blockexpand_output_shape(
img_height, img_width, block_height, block_width, stride_height, const std::vector<int> dilations({1, 1});
stride_width, padding_height, padding_width, outputHeight, outputWidth); const std::vector<int> strides(
{stride_height, stride_width, stride_height, stride_width});
std::vector<int> stride({stride_height, stride_width}); const std::vector<int> paddings(
std::vector<int> padding({padding_height, padding_width}); {padding_height, padding_width, padding_height, padding_width});
// std::vector<int> stride({stride_height, stride_width});
auto d_out_dims = d_out->dims();
for (int i = 0; i < N; i++) { d_out->Resize({batch_size, d_out->numel() / batch_size});
Tensor dst = d_x->Slice(i, i + 1).Resize({C, img_height, img_width}); for (int i = 0; i < batch_size; i++) {
Tensor src = d_out->Slice(i, i + 1).Resize( Tensor dst =
{outputHeight, outputWidth, C, block_height, block_width}); d_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
const Tensor src = d_out->Slice(i, i + 1).Resize(
{output_height, output_width, img_channels, block_height,
block_width});
math::Col2ImFunctor<math::ColFormat::kOCF, Place, T> f; math::Col2ImFunctor<math::ColFormat::kOCF, Place, T> f;
f(ctx.device_context(), dst, stride, padding, &src); f(ctx.device_context(), src, dilations, strides, paddings, &dst);
} }
d_out->Resize(d_out_dims);
} }
}; };
......
...@@ -4,20 +4,20 @@ from op_test import OpTest ...@@ -4,20 +4,20 @@ from op_test import OpTest
def get_output_shape(attrs, x): def get_output_shape(attrs, x):
img_height = x.shape[1] img_height = x.shape[2]
img_width = x.shape[2] img_width = x.shape[3]
padding_height = attrs['paddingHeight'] padding_height = attrs['padding_height']
padding_width = attrs['paddingWidth'] padding_width = attrs['padding_width']
block_height = attrs['blockHeight'] block_height = attrs['block_height']
block_width = attrs['blockWidth'] block_width = attrs['block_width']
stride_height = attrs['strideHeight'] stride_height = attrs['stride_height']
stride_width = attrs['strideWidth'] stride_width = attrs['stride_width']
output_height = \ output_height = \
1 + \ 1 + \
(img_height + 2 * padding_height - block_height + stride_height - 1) / \ (img_height + 2 * padding_height - block_height + stride_height - 1) / \
strideHeight stride_height
output_width = \ output_width = \
1 + \ 1 + \
...@@ -42,10 +42,10 @@ def im2col(attrs, im, col): ...@@ -42,10 +42,10 @@ def im2col(attrs, im, col):
filter_height = col.shape[3] filter_height = col.shape[3]
filter_width = col.shape[4] filter_width = col.shape[4]
stride_height = attrs['strideHeight'] stride_height = attrs['stride_height']
stride_width = attrs['strideWidth'] stride_width = attrs['stride_width']
padding_height = attrs['paddingHeight'] padding_height = attrs['padding_height']
padding_width = attrs['paddingWidth'] padding_width = attrs['padding_width']
for col_row_idx in range(0, output_height): for col_row_idx in range(0, output_height):
for col_col_idx in range(0, output_width): for col_col_idx in range(0, output_width):
...@@ -73,83 +73,51 @@ def im2col(attrs, im, col): ...@@ -73,83 +73,51 @@ def im2col(attrs, im, col):
im_row_offset][im_col_offset] im_row_offset][im_col_offset]
def col2img(attrs, col, img): def block_expand(inputs, attrs):
""" output_height, output_width = get_output_shape(attrs, inputs)
img: {CHW} img_channels = inputs.shape[1]
col: batch_size = inputs.shape[0]
{output_height, outputWidth, inputChannels, filterHeight, filterWidth} out = np.zeros([
""" batch_size, output_height, output_width, img_channels,
input_channels = im.shape[0] attrs['block_height'], attrs['block_width']
input_height = im.shape[1] ]).astype("float32")
input_width = im.shape[2]
output_height = col.shape[0]
output_width = col.shape[1]
filter_height = col.shape[3]
filter_width = col.shape[4]
stride_height = attrs['strideHeight'] for i in range(len(inputs)):
stride_width = attrs['strideWidth'] im2col(attrs, inputs[i], out[i])
padding_height = attrs['paddingHeight']
padding_width = attrs['paddingWidth']
for col_row_idx in range(0, output_height): out = out.reshape([
for col_col_idx in range(0, output_width): batch_size * output_height * output_width,
for channel in range(0, input_channels): img_channels * attrs['block_height'] * attrs['block_width']
for filter_row_idx in range(0, filter_height): ])
for filter_col_idx in range(0, filter_width): return out
im_row_offset = \
col_row_idx * stride_height + filter_row_idx - padding_height
im_col_offset = \
col_col_idx * stride_width + filter_col_idx - padding_width
if (im_row_offset >= 0 and
im_row_offset < input_height and
im_col_offset >= 0 and
im_col_offset < input_width):
im[channel][im_row_offset][im_col_offset] = \
col[col_row_idx][col_col_idx][channel][filter_row_idx][filter_col_idx]
def get_input_data(C, H, W):
x = np.random.uniform(0.1, 1, [C, H, W]).astype("float32")
for c in range(0, C):
for h in range(0, H):
for w in range(0, W):
#x[c][h][w] = c * H * W + h *W + w
x[c][h][w] = 0.2 + 0.01 * (c * H * W + h * W + w)
return x
class TestBlockExpandOp(OpTest): class TestBlockExpandOp(OpTest):
def setUp(self): def config(self):
C = 3 self.batch_size = 1
H = 4 self.img_channels = 3
W = 4 self.img_height = 4
x = get_input_data(C, H, W) self.img_width = 4
self.attrs = {
attrs = { 'block_height': 2,
'blockHeight': 2, 'block_width': 2,
'blockWidth': 2, 'stride_height': 1,
'strideHeight': 1, 'stride_width': 1,
'strideWidth': 1, 'padding_height': 1,
'paddingHeight': 1, 'padding_width': 1,
'paddingWidth': 1,
} }
output_height, output_width = get_output_shape(attrs, x) def setUp(self):
out = np.random.uniform(0.1, 1,\ self.config()
[output_height, output_width, x.shape[0], \
attrs['blockHeight'], attrs['blockWidth']]).astype("float32")
self.op_type = "block_expand" self.op_type = "block_expand"
self.inputs = {'X': x.reshape(1, C, H, W)} #x = np.random.uniform(0.1, 1,
self.attrs = attrs x = np.random.randint(0, 10, [
self.batch_size, self.img_channels, self.img_height, self.img_width
]).astype("float32")
im2col(attrs, x, out) out = block_expand(x, self.attrs)
self.outputs = { self.inputs = {'X': x}
'Out':out.reshape(1, output_height, output_width, x.shape[0], \ self.outputs = {'Out': out}
attrs['blockHeight'], attrs['blockWidth'])
}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
...@@ -158,42 +126,52 @@ class TestBlockExpandOp(OpTest): ...@@ -158,42 +126,52 @@ class TestBlockExpandOp(OpTest):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out')
class TestBlockExpandOp2(OpTest): class TestBlockExpandOpCase2(TestBlockExpandOp):
def setUp(self): def config(self):
C = 3 self.batch_size = 2
H = 4 self.img_channels = 3
W = 5 self.img_height = 4
x = get_input_data(C, H, W) self.img_width = 5
self.attrs = {
attrs = { 'block_height': 2,
'blockHeight': 2, 'block_width': 1,
'blockWidth': 1, 'stride_height': 2,
'strideHeight': 2, 'stride_width': 1,
'strideWidth': 1, 'padding_height': 2,
'paddingHeight': 2, 'padding_width': 1,
'paddingWidth': 1,
} }
output_height, output_width = get_output_shape(attrs, x)
out = np.random.uniform(0.1, 1,\
[output_height, output_width, x.shape[0], \
attrs['blockHeight'], attrs['blockWidth']]).astype("float32")
self.op_type = "block_expand" class TestBlockExpandOpCase3(TestBlockExpandOp):
self.inputs = {'X': x.reshape(1, C, H, W)} def config(self):
self.attrs = attrs self.batch_size = 3
self.img_channels = 1
im2col(attrs, x, out) self.img_height = 4
self.outputs = { self.img_width = 5
'Out':out.reshape(1, output_height, output_width, x.shape[0], \ self.attrs = {
attrs['blockHeight'], attrs['blockWidth']) 'block_height': 2,
'block_width': 1,
'stride_height': 2,
'stride_width': 1,
'padding_height': 2,
'padding_width': 0,
} }
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self): class TestBlockExpandOpCase4(TestBlockExpandOp):
self.check_grad(['X'], 'Out') def config(self):
self.batch_size = 2
self.img_channels = 2
self.img_height = 3
self.img_width = 3
self.attrs = {
'block_height': 2,
'block_width': 2,
'stride_height': 1,
'stride_width': 1,
'padding_height': 0,
'padding_width': 0,
}
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册