im2sequence_op.cc 6.1 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
G
gongweibao 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/im2sequence_op.h"
S
sneaxiy 已提交
16
#include <memory>
17
#include <string>
18
#include <vector>
G
gongweibao 已提交
19 20 21 22

namespace paddle {
namespace operators {

23
class Im2SequenceOp : public framework::OperatorWithKernel {
G
gongweibao 已提交
24 25 26 27 28
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

 protected:
  void InferShape(framework::InferShapeContext* ctx) const override {
G
gongweibao 已提交
29
    PADDLE_ENFORCE(ctx->HasInput("X"),
30
                   "Input(X) of Im2SequenceOp should not be null.");
G
gongweibao 已提交
31
    PADDLE_ENFORCE(ctx->HasOutput("Out"),
32
                   "Output(Out) of Im2SequenceOp op should not be null.");
G
gongweibao 已提交
33
    auto in_dim = ctx->GetInputDim("X");
34

G
gongweibao 已提交
35
    PADDLE_ENFORCE_EQ(in_dim.size(), 4,
W
wanghaoshuang 已提交
36
                      "Input(X) format must be 4D tensor, eg., NCHW.");
W
wanghaoshuang 已提交
37
    int img_channels = in_dim[1];
G
gongweibao 已提交
38

39 40 41 42
    auto kernels = ctx->Attrs().Get<std::vector<int>>("kernels");
    auto strides = ctx->Attrs().Get<std::vector<int>>("strides");
    auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");

W
whs 已提交
43 44
    ctx->SetOutputDim("Out",
                      {in_dim[0], img_channels * kernels[0] * kernels[1]});
G
gongweibao 已提交
45 46 47
  }
};

48
class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
G
gongweibao 已提交
49
 public:
Y
Yu Yang 已提交
50
  void Make() override {
W
wanghaoshuang 已提交
51
    AddInput("X",
W
wanghaoshuang 已提交
52
             "(Tensor) The input tensor has NCHW format."
W
wanghaoshuang 已提交
53 54 55 56
             "N: batch size"
             "C: channels"
             "H: height"
             "W: width");
57 58 59 60
    AddInput("Y",
             "(Tensor) The input tensor of image real size(H, W)."
             "2-D with shape [batchsize, 2]")
        .AsDispensable();
W
wanghaoshuang 已提交
61
    AddOutput("Out", "(LodTensor) The output data of im2sequence op,");
W
wanghaoshuang 已提交
62 63
    AddAttr<std::vector<int>>("kernels",
                              "(vector<int>), the "
W
wanghaoshuang 已提交
64 65 66 67 68
                              "kernels(kernel_height, kernel_width)");
    AddAttr<std::vector<int>>("strides",
                              "(vector<int> default:{1, 1}), the "
                              "strides(h_stride, w_stride)")
        .SetDefault({1, 1});
W
wanghaoshuang 已提交
69 70 71 72
    AddAttr<std::vector<int>>("paddings",
                              "(vector<int> default:{0, 0, 0, 0}), the "
                              "paddings(up_pad, left_pad, down_pad, right_pad)")
        .SetDefault({0, 0, 0, 0});
73 74 75 76 77 78 79
    AddAttr<std::vector<int>>("out_stride",
                              "the attribute is valid only when input(Y)"
                              "is not NULL.this attribute represents the"
                              "scaling of the pic through the CNN"
                              "(vector<int> dedault:{1,1}),the out_stride"
                              " (out_stride_height, out_stride_width)")
        .SetDefault({1, 1});
G
gongweibao 已提交
80
    AddComment(R"DOC(
W
wanghaoshuang 已提交
81 82 83 84
This op uses kernels to scan images and converts these images to sequences.
After expanding, The number of time steps are output_height * output_width
and the dimension of each time step is kernel_height * kernel_width * channels,
in which:
W
wanghaoshuang 已提交
85 86

output_height =
W
wanghaoshuang 已提交
87
    1 + (padding_height + padding_down + img_height - kernel_height + stride_height - 1) /
W
wanghaoshuang 已提交
88 89
            stride_height;
output_width =
W
wanghaoshuang 已提交
90
    1 + (padding_left + padding+right + img_width - kernel_width + stride_width - 1) /
W
wanghaoshuang 已提交
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
            stride_width;

This op can be used after convolution neural network, and before recurrent neural network.

Given:

x = [[[[ 6.  2.  1.]
       [ 8.  3.  5.]
       [ 0.  2.  6.]]

      [[ 2.  4.  4.]
       [ 6.  3.  0.]
       [ 6.  4.  7.]]]

     [[[ 6.  7.  1.]
       [ 5.  7.  9.]
       [ 2.  4.  8.]]

      [[ 1.  2.  1.]
       [ 1.  3.  5.]
       [ 9.  0.  8.]]]]
x.dims = {2, 2, 3, 3}

And:

W
wanghaoshuang 已提交
116 117 118
kernels = [2, 2]
strides = [1, 1]
paddings = [0, 0, 0, 0]
W
wanghaoshuang 已提交
119 120 121 122 123 124 125 126 127 128 129

Then:

output.data = [[ 6.  2.  8.  3.  2.  4.  6.  3.]
               [ 2.  1.  3.  5.  4.  4.  3.  0.]
               [ 8.  3.  0.  2.  6.  3.  6.  4.]
               [ 3.  5.  2.  6.  3.  0.  4.  7.]
               [ 6.  7.  5.  7.  1.  2.  1.  3.]
               [ 7.  1.  7.  9.  2.  1.  3.  5.]
               [ 5.  7.  2.  4.  1.  3.  9.  0.]
               [ 7.  9.  4.  8.  3.  5.  0.  8.]]
130
output.dims = {8, 8}
W
wanghaoshuang 已提交
131 132
output.lod = [[0, 4, 8]]

G
gongweibao 已提交
133 134 135 136
)DOC");
  }
};

137
class Im2SequenceGradOp : public framework::OperatorWithKernel {
G
gongweibao 已提交
138 139 140 141
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

 protected:
G
add gpu  
gongweibao 已提交
142 143 144
  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
G
gongweibao 已提交
145 146
                   "Input(Out@GRAD) shouldn't be null.");
    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
G
add gpu  
gongweibao 已提交
147
  }
G
gongweibao 已提交
148 149
};

S
sneaxiy 已提交
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
class Im2SequenceGradDescMaker : public framework::SingleGradOpDescMaker {
 public:
  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;

 protected:
  std::unique_ptr<framework::OpDesc> Apply() const override {
    std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
    op->SetType("im2sequence_grad");
    op->SetInput("X", Input("X"));
    op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
    op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
    op->SetAttrMap(Attrs());
    return op;
  }
};

G
gongweibao 已提交
166 167 168 169
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
Y
Yang Yang 已提交
170
REGISTER_OPERATOR(im2sequence, ops::Im2SequenceOp, ops::Im2SequenceOpMaker,
S
sneaxiy 已提交
171
                  ops::Im2SequenceGradDescMaker);
172
REGISTER_OPERATOR(im2sequence_grad, ops::Im2SequenceGradOp);
G
gongweibao 已提交
173
REGISTER_OP_CPU_KERNEL(
174 175
    im2sequence,
    ops::Im2SequenceKernel<paddle::platform::CPUDeviceContext, float>);
G
gongweibao 已提交
176
REGISTER_OP_CPU_KERNEL(
177 178
    im2sequence_grad,
    ops::Im2SequenceGradKernel<paddle::platform::CPUDeviceContext, float>);