im2sequence_op.cc 6.3 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
G
gongweibao 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/im2sequence_op.h"
S
sneaxiy 已提交
16
#include <memory>
17
#include <string>
18
#include <vector>
G
gongweibao 已提交
19 20 21 22

namespace paddle {
namespace operators {

23
class Im2SequenceOp : public framework::OperatorWithKernel {
G
gongweibao 已提交
24 25 26 27 28
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

 protected:
  void InferShape(framework::InferShapeContext* ctx) const override {
G
gongweibao 已提交
29
    PADDLE_ENFORCE(ctx->HasInput("X"),
30
                   "Input(X) of Im2SequenceOp should not be null.");
G
gongweibao 已提交
31
    PADDLE_ENFORCE(ctx->HasOutput("Out"),
32
                   "Output(Out) of Im2SequenceOp op should not be null.");
G
gongweibao 已提交
33
    auto in_dim = ctx->GetInputDim("X");
34

G
gongweibao 已提交
35
    PADDLE_ENFORCE_EQ(in_dim.size(), 4,
W
wanghaoshuang 已提交
36
                      "Input(X) format must be 4D tensor, eg., NCHW.");
L
liuwei1031 已提交
37
    auto img_channels = in_dim[1];
G
gongweibao 已提交
38

39 40 41
    auto kernels = ctx->Attrs().Get<std::vector<int>>("kernels");
    auto strides = ctx->Attrs().Get<std::vector<int>>("strides");
    auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
42 43 44 45 46 47
    if (!ctx->IsRuntime()) {
      // set lod level for compile-time
      framework::VarDesc* out_desc =
          boost::get<framework::VarDesc*>(ctx->GetOutputVarPtrs("Out")[0]);
      out_desc->SetLoDLevel(1);
    }
48

W
whs 已提交
49 50
    ctx->SetOutputDim("Out",
                      {in_dim[0], img_channels * kernels[0] * kernels[1]});
G
gongweibao 已提交
51 52 53
  }
};

54
class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
G
gongweibao 已提交
55
 public:
Y
Yu Yang 已提交
56
  void Make() override {
W
wanghaoshuang 已提交
57
    AddInput("X",
W
wanghaoshuang 已提交
58
             "(Tensor) The input tensor has NCHW format."
W
wanghaoshuang 已提交
59 60 61 62
             "N: batch size"
             "C: channels"
             "H: height"
             "W: width");
63 64 65 66
    AddInput("Y",
             "(Tensor) The input tensor of image real size(H, W)."
             "2-D with shape [batchsize, 2]")
        .AsDispensable();
W
wanghaoshuang 已提交
67
    AddOutput("Out", "(LodTensor) The output data of im2sequence op,");
W
wanghaoshuang 已提交
68 69
    AddAttr<std::vector<int>>("kernels",
                              "(vector<int>), the "
W
wanghaoshuang 已提交
70 71 72 73 74
                              "kernels(kernel_height, kernel_width)");
    AddAttr<std::vector<int>>("strides",
                              "(vector<int> default:{1, 1}), the "
                              "strides(h_stride, w_stride)")
        .SetDefault({1, 1});
W
wanghaoshuang 已提交
75 76 77 78
    AddAttr<std::vector<int>>("paddings",
                              "(vector<int> default:{0, 0, 0, 0}), the "
                              "paddings(up_pad, left_pad, down_pad, right_pad)")
        .SetDefault({0, 0, 0, 0});
79 80 81 82 83 84 85
    AddAttr<std::vector<int>>("out_stride",
                              "the attribute is valid only when input(Y)"
                              "is not NULL.this attribute represents the"
                              "scaling of the pic through the CNN"
                              "(vector<int> dedault:{1,1}),the out_stride"
                              " (out_stride_height, out_stride_width)")
        .SetDefault({1, 1});
G
gongweibao 已提交
86
    AddComment(R"DOC(
W
wanghaoshuang 已提交
87 88 89 90
This op uses kernels to scan images and converts these images to sequences.
After expanding, The number of time steps are output_height * output_width
and the dimension of each time step is kernel_height * kernel_width * channels,
in which:
W
wanghaoshuang 已提交
91 92

output_height =
W
wanghaoshuang 已提交
93
    1 + (padding_height + padding_down + img_height - kernel_height + stride_height - 1) /
W
wanghaoshuang 已提交
94 95
            stride_height;
output_width =
W
wanghaoshuang 已提交
96
    1 + (padding_left + padding+right + img_width - kernel_width + stride_width - 1) /
W
wanghaoshuang 已提交
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
            stride_width;

This op can be used after convolution neural network, and before recurrent neural network.

Given:

x = [[[[ 6.  2.  1.]
       [ 8.  3.  5.]
       [ 0.  2.  6.]]

      [[ 2.  4.  4.]
       [ 6.  3.  0.]
       [ 6.  4.  7.]]]

     [[[ 6.  7.  1.]
       [ 5.  7.  9.]
       [ 2.  4.  8.]]

      [[ 1.  2.  1.]
       [ 1.  3.  5.]
       [ 9.  0.  8.]]]]
x.dims = {2, 2, 3, 3}

And:

W
wanghaoshuang 已提交
122 123 124
kernels = [2, 2]
strides = [1, 1]
paddings = [0, 0, 0, 0]
W
wanghaoshuang 已提交
125 126 127 128 129 130 131 132 133 134 135

Then:

output.data = [[ 6.  2.  8.  3.  2.  4.  6.  3.]
               [ 2.  1.  3.  5.  4.  4.  3.  0.]
               [ 8.  3.  0.  2.  6.  3.  6.  4.]
               [ 3.  5.  2.  6.  3.  0.  4.  7.]
               [ 6.  7.  5.  7.  1.  2.  1.  3.]
               [ 7.  1.  7.  9.  2.  1.  3.  5.]
               [ 5.  7.  2.  4.  1.  3.  9.  0.]
               [ 7.  9.  4.  8.  3.  5.  0.  8.]]
136
output.dims = {8, 8}
W
wanghaoshuang 已提交
137 138
output.lod = [[0, 4, 8]]

G
gongweibao 已提交
139 140 141 142
)DOC");
  }
};

143
class Im2SequenceGradOp : public framework::OperatorWithKernel {
G
gongweibao 已提交
144 145 146 147
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

 protected:
G
add gpu  
gongweibao 已提交
148 149 150
  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
G
gongweibao 已提交
151 152
                   "Input(Out@GRAD) shouldn't be null.");
    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
G
add gpu  
gongweibao 已提交
153
  }
G
gongweibao 已提交
154 155
};

H
hong 已提交
156 157
template <typename T>
class Im2SequenceGradMaker : public framework::SingleGradOpMaker<T> {
S
sneaxiy 已提交
158
 public:
H
hong 已提交
159
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
S
sneaxiy 已提交
160 161

 protected:
162
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
163
    op->SetType("im2sequence_grad");
H
hong 已提交
164 165 166 167
    op->SetInput("X", this->Input("X"));
    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
    op->SetAttrMap(this->Attrs());
S
sneaxiy 已提交
168 169 170
  }
};

G
gongweibao 已提交
171 172 173 174
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
Y
Yang Yang 已提交
175
REGISTER_OPERATOR(im2sequence, ops::Im2SequenceOp, ops::Im2SequenceOpMaker,
H
hong 已提交
176 177
                  ops::Im2SequenceGradMaker<paddle::framework::OpDesc>,
                  ops::Im2SequenceGradMaker<paddle::imperative::OpBase>);
178
REGISTER_OPERATOR(im2sequence_grad, ops::Im2SequenceGradOp);
G
gongweibao 已提交
179
REGISTER_OP_CPU_KERNEL(
180 181
    im2sequence,
    ops::Im2SequenceKernel<paddle::platform::CPUDeviceContext, float>);
G
gongweibao 已提交
182
REGISTER_OP_CPU_KERNEL(
183 184
    im2sequence_grad,
    ops::Im2SequenceGradKernel<paddle::platform::CPUDeviceContext, float>);