提交 61aa1098 编写于 作者: H hedaoyuan

BlockExpandLayer based on the ImageExpand Function.

上级 48e0f432
...@@ -119,12 +119,17 @@ public: ...@@ -119,12 +119,17 @@ public:
1 + 1 +
(inputWidth + 2 * paddingW() - blockW() + strideW() - 1) / strideW(); (inputWidth + 2 * paddingW() - blockW() + strideW() - 1) / strideW();
CHECK_EQ(seqLength, outputHeight * outputWidth); CHECK_EQ(seqLength, outputHeight * outputWidth);
CHECK_EQ(stepSize, inputChannels * blockH() * blockH()); CHECK_EQ(stepSize, inputChannels * blockH() * blockW());
real* inputData = inputs[0].data<real>(); real* inputData = inputs[0].data<real>();
real* outputData = outputs[0].data<real>(); real* outputData = outputs[0].data<real>();
Im2ColFunctor<kOCF, Device, real> im2col; Im2ColFunctor<kOCF, Device, real> im2col;
for (size_t i = 0; i < batchSize; i++) { for (size_t i = 0; i < batchSize; i++) {
// The result of im2col is [output_height, output_width,
// input_channels, filter_height, filter_width], and it is easy to
// reshape into [seqLength, stepSize], where seqLength is equal
// output_height * output_width, stepSize is equal
// input_channels * filter_height * filter_width
im2col(inputData, im2col(inputData,
inputChannels, inputChannels,
inputHeight, inputHeight,
...@@ -161,4 +166,6 @@ protected: ...@@ -161,4 +166,6 @@ protected:
inline int blockW() const { return blocks_[1]; } inline int blockW() const { return blocks_[1]; }
}; };
REGISTER_TYPED_FUNC(ImageExpand, CPU, ImageExpandFunction);
} // namespace paddle } // namespace paddle
...@@ -37,6 +37,18 @@ bool BlockExpandLayer::init(const LayerMap& layerMap, ...@@ -37,6 +37,18 @@ bool BlockExpandLayer::init(const LayerMap& layerMap,
imgSizeH_ = blockConf.img_size_y(); imgSizeH_ = blockConf.img_size_y();
imgSizeW_ = blockConf.img_size_x(); imgSizeW_ = blockConf.img_size_x();
if (!useGpu_) {
std::vector<size_t> strides = {(size_t)strideH_, (size_t)strideW_};
std::vector<size_t> paddings = {(size_t)paddingH_, (size_t)paddingW_};
std::vector<size_t> blocks = {(size_t)blockH_, (size_t)blockW_};
createFunction(forward_,
"ImageExpand",
FuncConfig()
.set("strides", strides)
.set("paddings", paddings)
.set("blocks", blocks));
}
return true; return true;
} }
...@@ -63,10 +75,11 @@ void BlockExpandLayer::forward(PassType passType) { ...@@ -63,10 +75,11 @@ void BlockExpandLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight(); size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight();
size_t blockNum = getBlockNum(); size_t blockNum = getBlockNum();
size_t blockSize = blockH_ * blockW_ * channels_; size_t blockSize = blockH_ * blockW_ * channels_;
resetOutput(blockNum * batchSize, blockSize); resetOutput(blockNum * batchSize, blockSize);
// TODO(hedaoyuan): After completing the GPU version of ImageExpand,
// refactor the following code.
Argument& out = getOutput(); Argument& out = getOutput();
MatrixPtr outV = getOutputValue(); MatrixPtr outV = getOutputValue();
...@@ -78,38 +91,49 @@ void BlockExpandLayer::forward(PassType passType) { ...@@ -78,38 +91,49 @@ void BlockExpandLayer::forward(PassType passType) {
int* start = out.sequenceStartPositions->getMutableData(false); int* start = out.sequenceStartPositions->getMutableData(false);
int* dims = out.cpuSequenceDims->getData(); int* dims = out.cpuSequenceDims->getData();
for (size_t i = 0; i < batchSize; i++) { for (size_t i = 0; i < batchSize; i++) {
outVTrans_->zeroMem(); if (useGpu_) {
/* expand each block as one row */ outVTrans_->zeroMem();
MatrixPtr inputTmp = /* expand each block as one row */
Matrix::create(input->getData() + i * input->getWidth(), MatrixPtr inputTmp =
1, Matrix::create(input->getData() + i * input->getWidth(),
input->getWidth(), 1,
false, input->getWidth(),
useGpu_); false,
outVTrans_->convExpand(*inputTmp, useGpu_);
imgSizeH_, outVTrans_->convExpand(*inputTmp,
imgSizeW_, imgSizeH_,
channels_, imgSizeW_,
blockH_, channels_,
blockW_, blockH_,
strideH_, blockW_,
strideW_, strideH_,
paddingH_, strideW_,
paddingW_, paddingH_,
outputH_, paddingW_,
outputW_); outputH_,
MatrixPtr outVTmp = outputW_);
Matrix::create(outV->getData() + i * blockNum * blockSize, MatrixPtr outVTmp =
blockNum, Matrix::create(outV->getData() + i * blockNum * blockSize,
blockSize, blockNum,
false, blockSize,
useGpu_); false,
outVTrans_->transpose(outVTmp, false); useGpu_);
outVTrans_->transpose(outVTmp, false);
}
start[i] = i * blockNum; start[i] = i * blockNum;
dims[2 * i] = outputH_; dims[2 * i] = outputH_;
dims[2 * i + 1] = outputW_; dims[2 * i + 1] = outputW_;
} }
start[batchSize] = batchSize * blockNum; start[batchSize] = batchSize * blockNum;
if (!useGpu_) {
TensorShape inputShape({batchSize, channels_, imgSizeH_, imgSizeW_});
TensorShape outputShape({batchSize, blockNum, blockSize});
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getInputValue(0), inputShape);
outputs.addArg(*getOutputValue(), outputShape, ASSIGN_TO);
forward_[0]->calc(inputs, outputs);
}
} }
void BlockExpandLayer::backward(const UpdateCallback& callback) { void BlockExpandLayer::backward(const UpdateCallback& callback) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册