diff --git a/paddle/function/GemmConvOp.h b/paddle/function/GemmConvOp.h index 25d2e220bfb938597cb9c5d686b7f65851348d40..f724643f35af909b9557fe2282f23d4c062c8e8e 100644 --- a/paddle/function/GemmConvOp.h +++ b/paddle/function/GemmConvOp.h @@ -44,6 +44,7 @@ enum ColFormat { kCFO = 0, kOCF = 1 }; * input_channels, * filter_height, * filter_width] + * TODO(hedaoyuan): Refactor the arguments of the interface with TensorShape. */ template class Im2ColFunctor { diff --git a/paddle/function/ImageExpandOp.cpp b/paddle/function/ImageExpandOp.cpp index 0c10f30bbd9c129a949b660fb1a2e8122ea18597..4d8c25ffcdafa3dac0d239fa39b28d9714ebf611 100644 --- a/paddle/function/ImageExpandOp.cpp +++ b/paddle/function/ImageExpandOp.cpp @@ -70,16 +70,67 @@ public: } }; +template +class Col2ImFunctor { +public: + void operator()(const T* colData, + int inputChannels, + int inputHeight, + int inputWidth, + int filterHeight, + int filterWidth, + int strideHeight, + int strideWidth, + int paddingHeight, + int paddingWidth, + int outputHeight, + int outputWidth, + T* imData) { + for (int outputH = 0; outputH < outputHeight; ++outputH) { + for (int outputW = 0; outputW < outputWidth; ++outputW) { + for (int channel = 0; channel < inputChannels; ++channel) { + for (int filterH = 0; filterH < filterHeight; ++filterH) { + for (int filterW = 0; filterW < filterWidth; ++filterW) { + int imRowOffset = + outputH * strideHeight + filterH - paddingHeight; + int imColOffset = outputW * strideWidth + filterW - paddingWidth; + int colDataOffset = + (((outputH * outputWidth + outputW) * inputChannels + + channel) * + filterHeight + + filterH) * + filterWidth + + filterW; + if (imRowOffset >= 0 && imRowOffset < inputHeight && + imColOffset >= 0 && imColOffset < inputWidth) { + int imDataOffset = + (channel * inputHeight + imRowOffset) * inputWidth + + imColOffset; + imData[imDataOffset] += colData[colDataOffset]; + } + } + } + } + } + } + } +}; + /* * \brief Converts the image data of four dimensions(NCHW) into - * a sequence data of three dimensions(NST). Where N is batch size, - * S is the length of the sequence after each image is expanded, - * T is the size of each time step in the sequence. + * a sequence data of three dimensions(NST) in the forward calculation, + * which is reversed in the backward calculation. + * Where N is batch size, S is the length of the sequence after each + * image is expanded, T is the size of each time step in the sequence. * + * Arguments in forward function: * \param inputs[0] Image data of NCHW format. * \param outputs[0] Sequence data of NST format. + * + * Arguments in backward function: + * \param inputs[0] Sequence data of NST format. + * \param outputs[0] Image data of NCHW format. */ -template class ImageExpandFunction : public FunctionBase { public: void init(const FuncConfig& config) override { @@ -93,25 +144,27 @@ public: numOutputs_ = 1; } - void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(numInputs_, inputs.size()); - CHECK_EQ(numOutputs_, outputs.size()); - const TensorShape& input = inputs[0].shape(); - const TensorShape& output = outputs[0].shape(); - // input argument should be 4-dimensional. - CHECK_EQ(input.ndims(), (size_t)4); - // output argument should be 3-dimensional. - CHECK_EQ(output.ndims(), (size_t)3); - // The batchSize of the input needs to be equal to - // the batchSize of the output. - CHECK_EQ(input[0], output[0]); - - size_t batchSize = input[0]; - size_t inputChannels = input[1]; - size_t inputHeight = input[2]; - size_t inputWidth = input[3]; - size_t seqLength = output[1]; - size_t stepSize = output[2]; + virtual void calc(const BufferArgs& inputs, const BufferArgs& outputs) {} + + void check(const TensorShape& image, const TensorShape& sequence) { + // image shape should be 4-dimensional. + CHECK_EQ(image.ndims(), (size_t)4); + // sequence shape should be 3-dimensional. + CHECK_EQ(sequence.ndims(), (size_t)3); + // The batchSize of the image needs to be equal to + // the batchSize of the sequence. + CHECK_EQ(image[0], sequence[0]); + } + + // Calculate the shape of colData based on the shape of the image + // and the shape of the sequence. + TensorShape getColShape(const TensorShape& image, + const TensorShape& sequence) { + size_t inputChannels = image[1]; + size_t inputHeight = image[2]; + size_t inputWidth = image[3]; + size_t seqLength = sequence[1]; + size_t stepSize = sequence[2]; size_t outputHeight = 1 + (inputHeight + 2 * paddingH() - blockH() + strideH() - 1) / strideH(); @@ -121,8 +174,59 @@ public: CHECK_EQ(seqLength, outputHeight * outputWidth); CHECK_EQ(stepSize, inputChannels * blockH() * blockW()); - real* inputData = inputs[0].data(); - real* outputData = outputs[0].data(); + // [output_height, output_width, + // input_channels, filter_height, filter_width] + return TensorShape({outputHeight, + outputWidth, + inputChannels, + (size_t)blockH(), + (size_t)blockW()}); + } + +protected: + std::vector strides_; + std::vector paddings_; + std::vector blocks_; + + inline int strideH() const { return strides_[0]; } + + inline int strideW() const { return strides_[1]; } + + inline int paddingH() const { return paddings_[0]; } + + inline int paddingW() const { return paddings_[1]; } + + inline int blockH() const { return blocks_[0]; } + + inline int blockW() const { return blocks_[1]; } +}; + +template +class ImageExpandForward : public ImageExpandFunction { +public: + void init(const FuncConfig& config) override { + ImageExpandFunction::init(config); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(numInputs_, inputs.size()); + CHECK_EQ(numOutputs_, outputs.size()); + const TensorShape& image = inputs[0].shape(); + const TensorShape& sequence = outputs[0].shape(); + check(image, sequence); + + TensorShape colShape = getColShape(image, sequence); + size_t batchSize = image[0]; + size_t inputChannels = image[1]; + size_t inputHeight = image[2]; + size_t inputWidth = image[3]; + size_t seqLength = sequence[1]; + size_t stepSize = sequence[2]; + size_t outputHeight = colShape[0]; + size_t outputWidth = colShape[1]; + + real* imageData = inputs[0].data(); + real* seqData = outputs[0].data(); Im2ColFunctor im2col; for (size_t i = 0; i < batchSize; i++) { // The result of im2col is [output_height, output_width, @@ -130,7 +234,7 @@ public: // reshape into [seqLength, stepSize], where seqLength is equal // output_height * output_width, stepSize is equal // input_channels * filter_height * filter_width - im2col(inputData, + im2col(imageData, inputChannels, inputHeight, inputWidth, @@ -142,30 +246,64 @@ public: paddingW(), outputHeight, outputWidth, - outputData); - inputData += inputChannels * inputHeight * inputWidth; - outputData += seqLength * stepSize; + seqData); + imageData += inputChannels * inputHeight * inputWidth; + seqData += seqLength * stepSize; } } +}; -protected: - std::vector strides_; - std::vector paddings_; - std::vector blocks_; - - inline int strideH() const { return strides_[0]; } - - inline int strideW() const { return strides_[1]; } - - inline int paddingH() const { return paddings_[0]; } +template +class ImageExpandBackward : public ImageExpandFunction { +public: + void init(const FuncConfig& config) override { + ImageExpandFunction::init(config); + } - inline int paddingW() const { return paddings_[1]; } + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(numInputs_, inputs.size()); + CHECK_EQ(numOutputs_, outputs.size()); + // Since the implementation of Col2ImFunctor is ADD_TO, + // this function only supports ADD_TO mode. + CHECK_EQ(outputs[0].getArgType(), ADD_TO); + const TensorShape& image = outputs[0].shape(); + const TensorShape& sequence = inputs[0].shape(); + check(image, sequence); - inline int blockH() const { return blocks_[0]; } + TensorShape colShape = getColShape(image, sequence); + size_t batchSize = image[0]; + size_t inputChannels = image[1]; + size_t inputHeight = image[2]; + size_t inputWidth = image[3]; + size_t seqLength = sequence[1]; + size_t stepSize = sequence[2]; + size_t outputHeight = colShape[0]; + size_t outputWidth = colShape[1]; - inline int blockW() const { return blocks_[1]; } + real* imageData = outputs[0].data(); + real* seqData = inputs[0].data(); + Col2ImFunctor col2im; + for (size_t i = 0; i < batchSize; i++) { + col2im(seqData, + inputChannels, + inputHeight, + inputWidth, + blockH(), + blockW(), + strideH(), + strideW(), + paddingH(), + paddingW(), + outputHeight, + outputWidth, + imageData); + imageData += inputChannels * inputHeight * inputWidth; + seqData += seqLength * stepSize; + } + } }; -REGISTER_TYPED_FUNC(ImageExpand, CPU, ImageExpandFunction); +REGISTER_TYPED_FUNC(ImageExpand, CPU, ImageExpandForward); +REGISTER_TYPED_FUNC(ImageExpandGrad, CPU, ImageExpandBackward); } // namespace paddle diff --git a/paddle/gserver/layers/BlockExpandLayer.cpp b/paddle/gserver/layers/BlockExpandLayer.cpp index 9760d39bb4a3c8f94a2ffbd1fbd2ff7438298b9b..c8d0b21c8754d6d9e0643433e73eb2c4e4026267 100644 --- a/paddle/gserver/layers/BlockExpandLayer.cpp +++ b/paddle/gserver/layers/BlockExpandLayer.cpp @@ -47,6 +47,12 @@ bool BlockExpandLayer::init(const LayerMap& layerMap, .set("strides", strides) .set("paddings", paddings) .set("blocks", blocks)); + createFunction(backward_, + "ImageExpandGrad", + FuncConfig() + .set("strides", strides) + .set("paddings", paddings) + .set("blocks", blocks)); } return true; @@ -126,12 +132,12 @@ void BlockExpandLayer::forward(PassType passType) { } start[batchSize] = batchSize * blockNum; if (!useGpu_) { - TensorShape inputShape({batchSize, channels_, imgSizeH_, imgSizeW_}); - TensorShape outputShape({batchSize, blockNum, blockSize}); + inputShape_ = TensorShape({batchSize, channels_, imgSizeH_, imgSizeW_}); + outputShape_ = TensorShape({batchSize, blockNum, blockSize}); BufferArgs inputs; BufferArgs outputs; - inputs.addArg(*getInputValue(0), inputShape); - outputs.addArg(*getOutputValue(), outputShape, ASSIGN_TO); + inputs.addArg(*getInputValue(0), inputShape_); + outputs.addArg(*getOutputValue(), outputShape_, ASSIGN_TO); forward_[0]->calc(inputs, outputs); } } @@ -144,41 +150,50 @@ void BlockExpandLayer::backward(const UpdateCallback& callback) { if (!preGrad) { return; } - MatrixPtr grad = getOutputGrad(); - MatrixPtr gradTrans = Matrix::create(blockSize, blockNum, false, useGpu_); - size_t batchSize = preGrad->getHeight(); - CHECK_EQ(batchSize * blockNum, grad->getHeight()); - CHECK_EQ(blockSize, grad->getWidth()); + if (useGpu_) { + MatrixPtr grad = getOutputGrad(); + MatrixPtr gradTrans = Matrix::create(blockSize, blockNum, false, useGpu_); + size_t batchSize = preGrad->getHeight(); - for (size_t i = 0; i < batchSize; i++) { - MatrixPtr gradTmp = - Matrix::create(grad->getData() + i * blockNum * blockSize, - blockNum, - blockSize, - false, - useGpu_); - gradTmp->transpose(gradTrans, false); - MatrixPtr preGradTmp = - Matrix::create(preGrad->getData() + i * preGrad->getWidth(), - 1, - preGrad->getWidth(), - false, - useGpu_); - preGradTmp->convShrink(*gradTrans, - imgSizeH_, - imgSizeW_, - channels_, - blockH_, - blockW_, - strideH_, - strideW_, - paddingH_, - paddingW_, - outputH_, - outputW_, - 1.0, - 1.0); + CHECK_EQ(batchSize * blockNum, grad->getHeight()); + CHECK_EQ(blockSize, grad->getWidth()); + + for (size_t i = 0; i < batchSize; i++) { + MatrixPtr gradTmp = + Matrix::create(grad->getData() + i * blockNum * blockSize, + blockNum, + blockSize, + false, + useGpu_); + gradTmp->transpose(gradTrans, false); + MatrixPtr preGradTmp = + Matrix::create(preGrad->getData() + i * preGrad->getWidth(), + 1, + preGrad->getWidth(), + false, + useGpu_); + preGradTmp->convShrink(*gradTrans, + imgSizeH_, + imgSizeW_, + channels_, + blockH_, + blockW_, + strideH_, + strideW_, + paddingH_, + paddingW_, + outputH_, + outputW_, + 1.0, + 1.0); + } + } else { + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getOutputGrad(), outputShape_); + outputs.addArg(*getInputGrad(0), inputShape_, ADD_TO); + backward_[0]->calc(inputs, outputs); } } diff --git a/paddle/gserver/layers/BlockExpandLayer.h b/paddle/gserver/layers/BlockExpandLayer.h index 8f347400e60ec84fc1b5fdbc1c911a8768b306d0..edda0e0b630ae8e1d805a39f3e5d4cb50649570b 100644 --- a/paddle/gserver/layers/BlockExpandLayer.h +++ b/paddle/gserver/layers/BlockExpandLayer.h @@ -53,6 +53,9 @@ protected: /// auxiliary variable, which saves the transposed output value. MatrixPtr outVTrans_; + TensorShape inputShape_; + TensorShape outputShape_; + public: explicit BlockExpandLayer(const LayerConfig& config) : Layer(config) {}