im2sequence_op.cc 7.2 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
G
gongweibao 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/im2sequence_op.h"
16

S
sneaxiy 已提交
17
#include <memory>
18
#include <string>
19
#include <vector>
G
gongweibao 已提交
20 21 22 23

namespace paddle {
namespace operators {

24
class Im2SequenceOp : public framework::OperatorWithKernel {
G
gongweibao 已提交
25 26 27 28 29
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

 protected:
  void InferShape(framework::InferShapeContext* ctx) const override {
30 31
    PADDLE_ENFORCE_EQ(ctx->HasInput("X"),
                      true,
32 33
                      platform::errors::NotFound(
                          "The input 'X' of Im2SequenceOp is not found."));
34 35
    PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"),
                      true,
36 37
                      platform::errors::NotFound(
                          "The output 'Out' of Im2SequenceOp is not found."));
G
gongweibao 已提交
38
    auto in_dim = ctx->GetInputDim("X");
39

40
    PADDLE_ENFORCE_EQ(
41 42
        in_dim.size(),
        4,
43 44 45
        platform::errors::InvalidArgument(
            "The dimesions size of input 'X' in Im2SequenceOp should be 4. But "
            "received dimesions size=[%d], dimesions=[%s].",
46 47
            in_dim.size(),
            in_dim));
L
liuwei1031 已提交
48
    auto img_channels = in_dim[1];
G
gongweibao 已提交
49

50 51 52
    auto kernels = ctx->Attrs().Get<std::vector<int>>("kernels");
    auto strides = ctx->Attrs().Get<std::vector<int>>("strides");
    auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
53 54 55
    if (!ctx->IsRuntime()) {
      // set lod level for compile-time
      framework::VarDesc* out_desc =
56
          BOOST_GET(framework::VarDesc*, ctx->GetOutputVarPtrs("Out")[0]);
57 58
      out_desc->SetLoDLevel(1);
    }
59

W
whs 已提交
60 61
    ctx->SetOutputDim("Out",
                      {in_dim[0], img_channels * kernels[0] * kernels[1]});
G
gongweibao 已提交
62 63 64
  }
};

65
class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
G
gongweibao 已提交
66
 public:
Y
Yu Yang 已提交
67
  void Make() override {
W
wanghaoshuang 已提交
68
    AddInput("X",
W
wanghaoshuang 已提交
69
             "(Tensor) The input tensor has NCHW format."
W
wanghaoshuang 已提交
70 71 72 73
             "N: batch size"
             "C: channels"
             "H: height"
             "W: width");
74 75 76 77
    AddInput("Y",
             "(Tensor) The input tensor of image real size(H, W)."
             "2-D with shape [batchsize, 2]")
        .AsDispensable();
W
wanghaoshuang 已提交
78
    AddOutput("Out", "(LodTensor) The output data of im2sequence op,");
W
wanghaoshuang 已提交
79 80
    AddAttr<std::vector<int>>("kernels",
                              "(vector<int>), the "
W
wanghaoshuang 已提交
81 82 83 84 85
                              "kernels(kernel_height, kernel_width)");
    AddAttr<std::vector<int>>("strides",
                              "(vector<int> default:{1, 1}), the "
                              "strides(h_stride, w_stride)")
        .SetDefault({1, 1});
W
wanghaoshuang 已提交
86 87 88 89
    AddAttr<std::vector<int>>("paddings",
                              "(vector<int> default:{0, 0, 0, 0}), the "
                              "paddings(up_pad, left_pad, down_pad, right_pad)")
        .SetDefault({0, 0, 0, 0});
90 91 92 93 94 95 96
    AddAttr<std::vector<int>>("out_stride",
                              "the attribute is valid only when input(Y)"
                              "is not NULL.this attribute represents the"
                              "scaling of the pic through the CNN"
                              "(vector<int> dedault:{1,1}),the out_stride"
                              " (out_stride_height, out_stride_width)")
        .SetDefault({1, 1});
G
gongweibao 已提交
97
    AddComment(R"DOC(
W
wanghaoshuang 已提交
98 99 100 101
This op uses kernels to scan images and converts these images to sequences.
After expanding, The number of time steps are output_height * output_width
and the dimension of each time step is kernel_height * kernel_width * channels,
in which:
W
wanghaoshuang 已提交
102 103

output_height =
W
wanghaoshuang 已提交
104
    1 + (padding_height + padding_down + img_height - kernel_height + stride_height - 1) /
W
wanghaoshuang 已提交
105 106
            stride_height;
output_width =
W
wanghaoshuang 已提交
107
    1 + (padding_left + padding+right + img_width - kernel_width + stride_width - 1) /
W
wanghaoshuang 已提交
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
            stride_width;

This op can be used after convolution neural network, and before recurrent neural network.

Given:

x = [[[[ 6.  2.  1.]
       [ 8.  3.  5.]
       [ 0.  2.  6.]]

      [[ 2.  4.  4.]
       [ 6.  3.  0.]
       [ 6.  4.  7.]]]

     [[[ 6.  7.  1.]
       [ 5.  7.  9.]
       [ 2.  4.  8.]]

      [[ 1.  2.  1.]
       [ 1.  3.  5.]
       [ 9.  0.  8.]]]]
x.dims = {2, 2, 3, 3}

And:

W
wanghaoshuang 已提交
133 134 135
kernels = [2, 2]
strides = [1, 1]
paddings = [0, 0, 0, 0]
W
wanghaoshuang 已提交
136 137 138 139 140 141 142 143 144 145 146

Then:

output.data = [[ 6.  2.  8.  3.  2.  4.  6.  3.]
               [ 2.  1.  3.  5.  4.  4.  3.  0.]
               [ 8.  3.  0.  2.  6.  3.  6.  4.]
               [ 3.  5.  2.  6.  3.  0.  4.  7.]
               [ 6.  7.  5.  7.  1.  2.  1.  3.]
               [ 7.  1.  7.  9.  2.  1.  3.  5.]
               [ 5.  7.  2.  4.  1.  3.  9.  0.]
               [ 7.  9.  4.  8.  3.  5.  0.  8.]]
147
output.dims = {8, 8}
W
wanghaoshuang 已提交
148 149
output.lod = [[0, 4, 8]]

G
gongweibao 已提交
150 151 152 153
)DOC");
  }
};

154
class Im2SequenceGradOp : public framework::OperatorWithKernel {
G
gongweibao 已提交
155 156 157 158
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

 protected:
G
add gpu  
gongweibao 已提交
159
  void InferShape(framework::InferShapeContext* ctx) const override {
160 161
    PADDLE_ENFORCE_EQ(ctx->HasInput("X"),
                      true,
162 163
                      platform::errors::NotFound(
                          "The input 'X' of Im2SequenceGradOp is not found."));
164 165
    PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")),
                      true,
166 167 168
                      platform::errors::NotFound(
                          "The input %s of Im2SequenceGradOp is not found.",
                          framework::GradVarName("Out")));
G
gongweibao 已提交
169
    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
G
add gpu  
gongweibao 已提交
170
  }
G
gongweibao 已提交
171 172
};

H
hong 已提交
173 174
template <typename T>
class Im2SequenceGradMaker : public framework::SingleGradOpMaker<T> {
S
sneaxiy 已提交
175
 public:
H
hong 已提交
176
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
S
sneaxiy 已提交
177 178

 protected:
179
  void Apply(GradOpPtr<T> op) const override {
S
sneaxiy 已提交
180
    op->SetType("im2sequence_grad");
H
hong 已提交
181 182 183 184
    op->SetInput("X", this->Input("X"));
    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
    op->SetAttrMap(this->Attrs());
S
sneaxiy 已提交
185 186 187
  }
};

G
gongweibao 已提交
188 189 190 191
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
192 193 194
REGISTER_OPERATOR(im2sequence,
                  ops::Im2SequenceOp,
                  ops::Im2SequenceOpMaker,
H
hong 已提交
195 196
                  ops::Im2SequenceGradMaker<paddle::framework::OpDesc>,
                  ops::Im2SequenceGradMaker<paddle::imperative::OpBase>);
197
REGISTER_OPERATOR(im2sequence_grad, ops::Im2SequenceGradOp);
L
Leo Chen 已提交
198 199 200 201
REGISTER_OP_CPU_KERNEL(im2sequence,
                       ops::Im2SequenceKernel<phi::CPUContext, float>);
REGISTER_OP_CPU_KERNEL(im2sequence_grad,
                       ops::Im2SequenceGradKernel<phi::CPUContext, float>);
202 203 204 205 206 207 208

REGISTER_OP_CUDA_KERNEL(
    im2sequence,
    ops::Im2SequenceKernel<paddle::platform::CUDADeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(
    im2sequence_grad,
    ops::Im2SequenceGradKernel<paddle::platform::CUDADeviceContext, float>);