diff --git a/paddle/function/ImageExpandOp.cpp b/paddle/function/ImageExpandOp.cpp index 426b6c8e312e4b5e7a68370ffcc87e152fdabd26..0c10f30bbd9c129a949b660fb1a2e8122ea18597 100644 --- a/paddle/function/ImageExpandOp.cpp +++ b/paddle/function/ImageExpandOp.cpp @@ -119,12 +119,17 @@ public: 1 + (inputWidth + 2 * paddingW() - blockW() + strideW() - 1) / strideW(); CHECK_EQ(seqLength, outputHeight * outputWidth); - CHECK_EQ(stepSize, inputChannels * blockH() * blockH()); + CHECK_EQ(stepSize, inputChannels * blockH() * blockW()); real* inputData = inputs[0].data(); real* outputData = outputs[0].data(); Im2ColFunctor im2col; for (size_t i = 0; i < batchSize; i++) { + // The result of im2col is [output_height, output_width, + // input_channels, filter_height, filter_width], and it is easy to + // reshape into [seqLength, stepSize], where seqLength is equal + // output_height * output_width, stepSize is equal + // input_channels * filter_height * filter_width im2col(inputData, inputChannels, inputHeight, @@ -161,4 +166,6 @@ protected: inline int blockW() const { return blocks_[1]; } }; +REGISTER_TYPED_FUNC(ImageExpand, CPU, ImageExpandFunction); + } // namespace paddle diff --git a/paddle/gserver/layers/BlockExpandLayer.cpp b/paddle/gserver/layers/BlockExpandLayer.cpp index 2bafeb92158c56efe32f90742807f0af07bda5af..9760d39bb4a3c8f94a2ffbd1fbd2ff7438298b9b 100644 --- a/paddle/gserver/layers/BlockExpandLayer.cpp +++ b/paddle/gserver/layers/BlockExpandLayer.cpp @@ -37,6 +37,18 @@ bool BlockExpandLayer::init(const LayerMap& layerMap, imgSizeH_ = blockConf.img_size_y(); imgSizeW_ = blockConf.img_size_x(); + if (!useGpu_) { + std::vector strides = {(size_t)strideH_, (size_t)strideW_}; + std::vector paddings = {(size_t)paddingH_, (size_t)paddingW_}; + std::vector blocks = {(size_t)blockH_, (size_t)blockW_}; + createFunction(forward_, + "ImageExpand", + FuncConfig() + .set("strides", strides) + .set("paddings", paddings) + .set("blocks", blocks)); + } + return true; } @@ -63,10 +75,11 @@ void BlockExpandLayer::forward(PassType passType) { Layer::forward(passType); size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight(); - size_t blockNum = getBlockNum(); size_t blockSize = blockH_ * blockW_ * channels_; resetOutput(blockNum * batchSize, blockSize); + // TODO(hedaoyuan): After completing the GPU version of ImageExpand, + // refactor the following code. Argument& out = getOutput(); MatrixPtr outV = getOutputValue(); @@ -78,38 +91,49 @@ void BlockExpandLayer::forward(PassType passType) { int* start = out.sequenceStartPositions->getMutableData(false); int* dims = out.cpuSequenceDims->getData(); for (size_t i = 0; i < batchSize; i++) { - outVTrans_->zeroMem(); - /* expand each block as one row */ - MatrixPtr inputTmp = - Matrix::create(input->getData() + i * input->getWidth(), - 1, - input->getWidth(), - false, - useGpu_); - outVTrans_->convExpand(*inputTmp, - imgSizeH_, - imgSizeW_, - channels_, - blockH_, - blockW_, - strideH_, - strideW_, - paddingH_, - paddingW_, - outputH_, - outputW_); - MatrixPtr outVTmp = - Matrix::create(outV->getData() + i * blockNum * blockSize, - blockNum, - blockSize, - false, - useGpu_); - outVTrans_->transpose(outVTmp, false); + if (useGpu_) { + outVTrans_->zeroMem(); + /* expand each block as one row */ + MatrixPtr inputTmp = + Matrix::create(input->getData() + i * input->getWidth(), + 1, + input->getWidth(), + false, + useGpu_); + outVTrans_->convExpand(*inputTmp, + imgSizeH_, + imgSizeW_, + channels_, + blockH_, + blockW_, + strideH_, + strideW_, + paddingH_, + paddingW_, + outputH_, + outputW_); + MatrixPtr outVTmp = + Matrix::create(outV->getData() + i * blockNum * blockSize, + blockNum, + blockSize, + false, + useGpu_); + outVTrans_->transpose(outVTmp, false); + } start[i] = i * blockNum; dims[2 * i] = outputH_; dims[2 * i + 1] = outputW_; } start[batchSize] = batchSize * blockNum; + if (!useGpu_) { + TensorShape inputShape({batchSize, channels_, imgSizeH_, imgSizeW_}); + TensorShape outputShape({batchSize, blockNum, blockSize}); + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getInputValue(0), inputShape); + outputs.addArg(*getOutputValue(), outputShape, ASSIGN_TO); + forward_[0]->calc(inputs, outputs); + } } void BlockExpandLayer::backward(const UpdateCallback& callback) {