diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h index 9f84db72da24b0e678520b077f9cba7ffc2d589a..6b56d9ec8d3daae96aaaa04ed79cb637331e2281 100644 --- a/paddle/cuda/include/hl_cnn.h +++ b/paddle/cuda/include/hl_cnn.h @@ -173,6 +173,96 @@ extern void hl_avgpool_backward(const int frameCnt, real* backGrad, const int outStride); +extern void hl_maxpool3D_forward(const int frameCnt, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real* tgtData, + real* maxPoolIdxData, + const int tgtStride); + +extern void hl_maxpool3D_backward(const int frameCnt, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real scaleA, + real scaleB, + real* targetGrad, + real* maxPoolIdxData, + const int outStride); + +extern void hl_avgpool3D_forward(const int frameCnt, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real* tgtData, + const int tgtStride); + +extern void hl_avgpool3D_backward(const int frameCnt, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + int paddingD, + int paddingH, + int paddingW, + real scaleA, + real scaleB, + real* backGrad, + const int outStride); + /** * @brief Bilinear interpolation forward. * @@ -275,4 +365,4 @@ extern void hl_maxout_backward(real* inGrad, size_t featLen, size_t groups); -#endif /* HL_CNN_H_ */ +#endif // HL_CNN_H_ diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h index 2bbb9fa8dfd5eeac9d55aa67a28ebfbffa2acd46..a76dbf0b6578de0606702ad1af227fbf6e1cd62e 100644 --- a/paddle/cuda/include/stub/hl_cnn_stub.h +++ b/paddle/cuda/include/stub/hl_cnn_stub.h @@ -87,6 +87,96 @@ inline void hl_avgpool_backward(const int frameCnt, real* backGrad, const int outStride) {} +inline void hl_maxpool3D_forward(const int frameCnt, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real* tgtData, + real* maxPoolIdxData, + const int tgtStride) {} + +inline void hl_maxpool3D_backward(const int frameCnt, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real scaleA, + real scaleB, + real* targetGrad, + real* maxPoolIdxData, + const int outStride) {} + +inline void hl_avgpool3D_forward(const int frameCnt, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real* tgtData, + const int tgtStride) {} + +inline void hl_avgpool3D_backward(const int frameCnt, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real scaleA, + real scaleB, + real* backGrad, + const int outStride) {} + inline void hl_bilinear_forward(const real* inData, const size_t inImgH, const size_t inImgW, diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu index aac19b1ea566ad69f1f7374e393676c8debd9883..9ba3d142617537c0160f6dccb86ddca43ada15a5 100644 --- a/paddle/cuda/src/hl_cuda_cnn.cu +++ b/paddle/cuda/src/hl_cuda_cnn.cu @@ -353,6 +353,433 @@ void hl_avgpool_backward(const int frameCnt, CHECK_SYNC("hl_avgpool_backward failed"); } +__global__ void KeMaxPool3DForward(const int nthreads, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int ksizeD, + const int ksizeH, + const int ksizeW, + const int strideD, + const int strideH, + const int strideW, + const int padD, + const int padH, + const int padW, + real* tgtData, + real* maxPoolIdxData, + const int tgtStride) { + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); + index += blockDim.x * gridDim.x) { + int pw = index % pooledW; + int ph = (index / pooledW) % pooledH; + int pd = (index / pooledW / pooledH) % pooledD; + int c = (index / pooledW / pooledH / pooledD) % channels; + int frameNum = index / pooledW / pooledH / pooledD / channels; + int dstart = pd * strideD - padD; + int hstart = ph * strideH - padH; + int wstart = pw * strideW - padW; + int dend = min(dstart + ksizeD, depth); + int hend = min(hstart + ksizeH, height); + int wend = min(wstart + ksizeW, width); + dstart = max(dstart, 0); + hstart = max(hstart, 0); + wstart = max(wstart, 0); + real maxval = -FLT_MAX; + int maxIdx = -1; + inputData += (frameNum * channels + c) * depth * height * width; + for (int d = dstart; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + if (maxval < inputData[(d * height + h) * width + w]) { + maxval = inputData[(d * height + h) * width + w]; + maxIdx = (d * height + h) * width + w; + } + } + } + } + int tgtIndex = + index % (pooledW * pooledH * pooledD * channels) + frameNum * tgtStride; + tgtData[tgtIndex] = maxval; + maxPoolIdxData[tgtIndex] = maxIdx; + } +} + +void hl_maxpool3D_forward(const int frameCnt, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int padD, + const int padH, + const int padW, + real* tgtData, + real* maxPoolIdxData, + const int tgtStride) { + int num_kernels = pooledD * pooledH * pooledW * channels * frameCnt; + int blocks = (num_kernels + 1024 - 1) / 1024; + dim3 threads(1024, 1); + dim3 grid(blocks, 1); + + KeMaxPool3DForward<<>>(num_kernels, + inputData, + channels, + depth, + height, + width, + pooledD, + pooledH, + pooledW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + padD, + padH, + padW, + tgtData, + maxPoolIdxData, + tgtStride); + CHECK_SYNC("hl_maxpool3D_forward failed"); +} + +__global__ void KeMaxPool3DBackward(const int nthreads, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int padD, + const int padH, + const int padW, + real scaleA, + real scaleB, + real* targetGrad, + real* maxPoolIdxData, + const int outStride) { + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); + index += blockDim.x * gridDim.x) { + int offsetW = index % width; + int offsetH = (index / width) % height; + int offsetD = (index / width / height) % depth; + int offsetC = (index / width / height / depth) % channels; + int frameNum = index / width / height / depth / channels; + + int pdstart = + (offsetD + padD < sizeZ) ? 0 : (offsetD + padD - sizeZ) / strideD + 1; + int phstart = + (offsetH + padH < sizeY) ? 0 : (offsetH + padH - sizeY) / strideH + 1; + int pwstart = + (offsetW + padW < sizeX) ? 0 : (offsetW + padW - sizeX) / strideW + 1; + int pdend = min((offsetD + padD) / strideD + 1, pooledD); + int phend = min((offsetH + padH) / strideH + 1, pooledH); + int pwend = min((offsetW + padW) / strideW + 1, pooledW); + + real gradient = 0; + outGrad += ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW); + maxPoolIdxData += + ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW); + for (int pd = pdstart; pd < pdend; ++pd) { + for (int ph = phstart; ph < phend; ++ph) { + for (int pw = pwstart; pw < pwend; ++pw) { + if (((offsetD * height + offsetH) * width + offsetW) == + maxPoolIdxData[(pd * pooledH + ph) * pooledW + pw]) + gradient += outGrad[(pd * pooledH + ph) * pooledW + pw]; + } + } + } + targetGrad[index] = scaleA * gradient + scaleB * targetGrad[index]; + } +} + +void hl_maxpool3D_backward(const int frameCnt, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int outputD, + const int outputH, + const int outputW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real scaleA, + real scaleB, + real* targetGrad, + real* maxPoolIdxData, + const int outStride) { + int num_kernels = depth * height * width * channels * frameCnt; + int blocks = (num_kernels + 1024 - 1) / 1024; + + KeMaxPool3DBackward<<>>(num_kernels, + outGrad, + channels, + depth, + height, + width, + outputD, + outputH, + outputW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + scaleA, + scaleB, + targetGrad, + maxPoolIdxData, + outStride); + CHECK_SYNC("hl_maxpool3D_backward"); +} + +__global__ void KeAvgPool3DForward(const int nthreads, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int padD, + const int padH, + const int padW, + real* tgtData, + const int tgtStride) { + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); + index += blockDim.x * gridDim.x) { + int pw = index % pooledW; + int ph = (index / pooledW) % pooledH; + int pd = (index / pooledW / pooledH) % pooledD; + int c = (index / pooledW / pooledH / pooledD) % channels; + int frameNum = index / pooledW / pooledH / pooledD / channels; + int dstart = pd * strideD - padD; + int hstart = ph * strideH - padH; + int wstart = pw * strideW - padW; + int dend = min(dstart + sizeZ, depth + padD); + int hend = min(hstart + sizeY, height + padH); + int wend = min(wstart + sizeX, width + padW); + int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart); + dstart = max(dstart, 0); + hstart = max(hstart, 0); + wstart = max(wstart, 0); + dend = min(dend, depth); + hend = min(hend, height); + wend = min(wend, width); + + real aveval = 0; + inputData += (frameNum * channels + c) * depth * height * width; + for (int d = dstart; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + aveval += inputData[(d * height + h) * width + w]; + } + } + } + int tgtIndex = + index % (pooledW * pooledH * pooledD * channels) + frameNum * tgtStride; + tgtData[tgtIndex] = aveval / pool_size; + } +} + +void hl_avgpool3D_forward(const int frameCnt, + const real* inputData, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int paddingD, + const int paddingH, + const int paddingW, + real* tgtData, + const int tgtStride) { + int num_kernels = pooledD * pooledH * pooledW * channels * frameCnt; + int blocks = (num_kernels + 1024 - 1) / 1024; + KeAvgPool3DForward<<>>(num_kernels, + inputData, + channels, + depth, + height, + width, + pooledD, + pooledH, + pooledW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + tgtData, + tgtStride); + CHECK_SYNC("hl_avgpool3D_forward failed"); +} + +__global__ void KeAvgPool3DBackward(const int nthreads, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int pooledD, + const int pooledH, + const int pooledW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + const int padD, + const int padH, + const int padW, + real scaleA, + real scaleB, + real* tgtGrad, + const int outStride) { + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); + index += blockDim.x * gridDim.x) { + int offsetW = index % width + padW; + int offsetH = (index / width) % height + padH; + int offsetD = (index / width / height) % depth + padD; + int offsetC = (index / width / height / depth) % channels; + int frameNum = index / width / height / depth / channels; + + int pdstart = (offsetD < sizeZ) ? 0 : (offsetD - sizeZ) / strideD + 1; + int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1; + int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1; + int pdend = min(offsetD / strideD + 1, pooledD); + int phend = min(offsetH / strideH + 1, pooledH); + int pwend = min(offsetW / strideW + 1, pooledW); + + real gradient = 0; + outGrad += (frameNum * channels + offsetC) * pooledD * pooledH * pooledW; + + for (int pd = pdstart; pd < pdend; ++pd) { + for (int ph = phstart; ph < phend; ++ph) { + for (int pw = pwstart; pw < pwend; ++pw) { + // figure out the pooling size + int dstart = pd * strideD - padD; + int hstart = ph * strideH - padH; + int wstart = pw * strideW - padW; + int dend = min(dstart + sizeZ, depth + padD); + int hend = min(hstart + sizeY, height + padH); + int wend = min(wstart + sizeX, width + padW); + int poolsize = (dend - dstart) * (hend - hstart) * (wend - wstart); + gradient += outGrad[(pd * pooledH + ph) * pooledW + pw] / poolsize; + } + } + } + tgtGrad[index] = scaleA * gradient + scaleB * tgtGrad[index]; + } +} + +void hl_avgpool3D_backward(const int frameCnt, + const real* outGrad, + const int channels, + const int depth, + const int height, + const int width, + const int outputD, + const int outputH, + const int outputW, + const int sizeZ, + const int sizeY, + const int sizeX, + const int strideD, + const int strideH, + const int strideW, + int paddingD, + int paddingH, + int paddingW, + real scaleA, + real scaleB, + real* backGrad, + const int outStride) { + int num_kernels = depth * height * width * channels * frameCnt; + int blocks = (num_kernels + 1024 - 1) / 1024; + + KeAvgPool3DBackward<<>>(num_kernels, + outGrad, + channels, + depth, + height, + width, + outputD, + outputH, + outputW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + scaleA, + scaleB, + backGrad, + outStride); + CHECK_SYNC("hl_avgpool3D_backward failed"); +} + __global__ void KeBilinearInterpFw(const real* in, const size_t inImgH, const size_t inImgW, diff --git a/paddle/gserver/layers/Pool3DLayer.cpp b/paddle/gserver/layers/Pool3DLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..199f21adb1a5923b590e4f0e716fc67effb2a2d1 --- /dev/null +++ b/paddle/gserver/layers/Pool3DLayer.cpp @@ -0,0 +1,178 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Pool3DLayer.h" +#include "PoolProjectionLayer.h" +#include "paddle/utils/Logging.h" + +namespace paddle { + +REGISTER_LAYER(pool3d, Pool3DLayer); + +bool Pool3DLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + /* the size of inputs for pool-layer is 1 */ + CHECK_EQ(config_.inputs_size(), 1); + + const PoolConfig& conf = config_.inputs(0).pool_conf(); + poolType_ = conf.pool_type(); + channels_ = conf.channels(); + + sizeX_ = conf.size_x(); + sizeY_ = conf.size_y(); + sizeZ_ = conf.size_z(); + + strideW_ = conf.stride(); + strideH_ = conf.stride_y(); + strideD_ = conf.stride_z(); + + imgSizeW_ = conf.img_size(); + imgSizeH_ = conf.img_size_y(); + imgSizeD_ = conf.img_size_z(); + + paddingW_ = conf.padding(); + paddingH_ = conf.padding_y(); + paddingD_ = conf.padding_z(); + + outputW_ = conf.output_x(); + outputH_ = conf.output_y(); + outputD_ = conf.output_z(); + + return true; +} + +size_t Pool3DLayer::getSize() { + CHECK_EQ(inputLayers_.size(), 1UL); + + size_t layerSize = 0; + outputD_ = outputSize(imgSizeD_, sizeZ_, paddingD_, strideD_, false); + outputH_ = outputSize(imgSizeH_, sizeY_, paddingH_, strideH_, false); + outputW_ = outputSize(imgSizeW_, sizeX_, paddingW_, strideW_, false); + + layerSize = outputD_ * outputH_ * outputW_ * channels_; + getOutput().setFrameHeight(outputH_); + getOutput().setFrameWidth(outputW_); + getOutput().setFrameDepth(outputD_); + return layerSize; +} + +void Pool3DLayer::forward(PassType passType) { + Layer::forward(passType); + const MatrixPtr& inMat = inputLayers_[0]->getOutputValue(); + size_t batchSize = inMat->getHeight(); + size_t outWidth = getSize(); + resetOutput(batchSize, outWidth); + Matrix::resizeOrCreate(maxPoolIdx_, batchSize, outWidth, false, useGpu_); + const MatrixPtr outMat = getOutputValue(); + + if (poolType_ == "avg") { + outMat->avgPool3DForward(*inMat, + channels_, + imgSizeD_, + imgSizeH_, + imgSizeW_, + outputD_, + outputH_, + outputW_, + sizeZ_, + sizeY_, + sizeX_, + strideD_, + strideH_, + strideW_, + paddingD_, + paddingH_, + paddingW_); + } else if (poolType_ == "max") { + outMat->maxPool3DForward(*inMat, + *maxPoolIdx_, + channels_, + imgSizeD_, + imgSizeH_, + imgSizeW_, + outputD_, + outputH_, + outputW_, + sizeZ_, + sizeY_, + sizeX_, + strideD_, + strideH_, + strideW_, + paddingD_, + paddingH_, + paddingW_); + } else { + LOG(FATAL) << "Unknown pool type: " << poolType_; + } + forwardActivation(); +} + +void Pool3DLayer::backward(const UpdateCallback& callback) { + backwardActivation(); + + (void)callback; + if (NULL == getInputGrad(0)) return; + MatrixPtr inMat = inputLayers_[0]->getOutputValue(); + MatrixPtr inGradMat = inputLayers_[0]->getOutputGrad(); + MatrixPtr outMat = getOutputValue(); + MatrixPtr outGradMat = getOutputGrad(); + + if (poolType_ == "avg") { + inGradMat->avgPool3DBackward(*outGradMat, + imgSizeD_, + imgSizeH_, + imgSizeW_, + outputD_, + outputH_, + outputW_, + sizeZ_, + sizeY_, + sizeZ_, + strideD_, + strideH_, + strideW_, + paddingD_, + paddingH_, + paddingW_, + 1.0, + 1.0); + } else if (poolType_ == "max") { + inGradMat->maxPool3DBackward(*outGradMat, + *maxPoolIdx_, + imgSizeD_, + imgSizeH_, + imgSizeW_, + outputD_, + outputH_, + outputW_, + sizeZ_, + sizeY_, + sizeZ_, + strideD_, + strideH_, + strideW_, + paddingD_, + paddingH_, + paddingW_, + 1.0, + 1.0); + } else { + LOG(FATAL) << "Unknown pool type: " << poolType_; + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/Pool3DLayer.h b/paddle/gserver/layers/Pool3DLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..8329a02f571bf3b5422134c756c248f77fd517b1 --- /dev/null +++ b/paddle/gserver/layers/Pool3DLayer.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "Layer.h" +#include "paddle/math/MathUtils.h" +#include "paddle/math/Matrix.h" + +namespace paddle { + +/** + * @brief Basic parent layer of pooling + * Pools the input within regions + */ +class Pool3DLayer : public Layer { +public: + explicit Pool3DLayer(const LayerConfig& config) : Layer(config) {} + ~Pool3DLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; + size_t getSize(); + +protected: + int channels_; + int sizeX_, sizeY_, sizeZ_; + int strideW_, strideH_, strideD_; + int paddingW_, paddingH_, paddingD_; + int imgSizeW_, imgSizeH_, imgSizeD_; + int outputW_, outputH_, outputD_; + std::string poolType_; + MatrixPtr maxPoolIdx_; +}; +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index c54f3ef965f4bed730cf5e0e82130b0416cb34c7..a831ffbc73fbd6ad42fa31b2d6d583718474e59b 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1246,6 +1246,75 @@ TEST(Layer, PoolLayer) { #endif } +void setPool3DConfig(TestConfig* config, + PoolConfig* pool, + const string& poolType) { + // filter size + const int NUM_FILTERS = 16; + const int FILTER_SIZE = 3; + const int FILTER_SIZE_Y = 3; + const int FILTER_SIZE_Z = 3; + const int CHANNELS = 16; + + (*config).biasSize = 0; + (*config).layerConfig.set_type("pool3d"); + (*config).layerConfig.set_num_filters(NUM_FILTERS); + + int kw = FILTER_SIZE, kh = FILTER_SIZE_Y, kd = FILTER_SIZE_Z; + int pw = 0, ph = 0, pd = 0; + int sw = 2, sh = 2, sd = 2; + + pool->set_pool_type(poolType); + pool->set_pool_type("avg"); + pool->set_channels(CHANNELS); + pool->set_size_x(kw); + pool->set_size_y(kh); + pool->set_size_z(kd); + pool->set_padding(0); + pool->set_padding_y(0); + pool->set_padding_z(0); + pool->set_stride(sw); + pool->set_stride_y(sh); + pool->set_stride_z(sd); + pool->set_start(0); + int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false); + int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false); + int od = outputSize(pool->img_size_z(), kd, pd, sd, /* caffeMode */ false); + pool->set_output_x(ow); + pool->set_output_y(oh); + pool->set_output_z(od); +} + +void testPool3DLayer(const string& poolType, bool trans, bool useGpu) { + TestConfig config; + config.inputDefs.push_back({INPUT_DATA, "layer_0", 11664, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + PoolConfig* pool = input->mutable_pool_conf(); + + const int IMAGE_SIZE = 9; + const int IMAGE_SIZE_Y = 9; + const int IMAGE_SIZE_Z = 9; + + pool->set_img_size(IMAGE_SIZE); + pool->set_img_size_y(IMAGE_SIZE_Y); + pool->set_img_size_z(IMAGE_SIZE_Z); + + setPool3DConfig(&config, pool, poolType); + config.layerConfig.set_size(pool->output_x() * pool->output_y() * + pool->channels()); + + testLayerGrad(config, "pool3d", 100, trans, useGpu); +} + +TEST(Layer, Pool3DLayer) { + testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ false); + testPool3DLayer("max", /* trans= */ false, /* useGpu= */ false); +#ifndef PADDLE_ONLY_CPU + testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ true); + testPool3DLayer("max", /* trans= */ false, /* useGpu= */ true); +#endif +} + void testSppLayer(const string& poolType, const int pyramidHeight, bool trans, diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 579a0f3cf32b803c1d3ac2af57517ad6490f31ef..8bc42571f7c141aa31e18d0504b95b2ed4f0da77 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1190,6 +1190,221 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, outGrad.getStride()); } +void GpuMatrix::maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, + size_t channels, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + CHECK(inputMat.useGpu_) << "Matrix type are not correct"; + + real* inputData = inputMat.getData(); + real* maxPoolIdxData = maxPoolIdx.getData(); + size_t num = inputMat.getHeight(); + size_t width = imgSizeW; + size_t height = imgSizeH; + size_t depth = imgSizeD; + CHECK(depth * height * width * channels == inputMat.getWidth()); + CHECK(height_ == inputMat.getHeight()); + CHECK(width_ == outputD * outputH * outputW * channels); + + hl_maxpool3D_forward(num, + inputData, + channels, + depth, + height, + width, + outputD, + outputH, + outputW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + getData(), + maxPoolIdxData, + getStride()); +} + +void GpuMatrix::maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW, + real scaleTargets, + real scaleOutput) { + CHECK(outGrad.useGpu_ && maxPoolIdx.useGpu_) << "Matrix type are not equal"; + + real* outDiff = outGrad.getData(); + real* maxPoolIdxData = maxPoolIdx.getData(); + size_t frameNum = getHeight(); + size_t channels = outGrad.getWidth() / outputD / outputH / outputW; + size_t width = imgSizeW; + size_t height = imgSizeH; + size_t depth = imgSizeD; + CHECK(depth * height * width * channels == getWidth()); + CHECK(width_ == depth * width * height * channels); + CHECK(outGrad.getHeight() == maxPoolIdx.getHeight() && + outGrad.getWidth() == maxPoolIdx.getWidth()); + + hl_maxpool3D_backward(frameNum, + outDiff, + channels, + depth, + height, + width, + outputD, + outputH, + outputW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + scaleTargets, + scaleOutput, + getData(), + maxPoolIdxData, + outGrad.getStride()); +} + +void GpuMatrix::avgPool3DForward(Matrix& inputMat, + size_t channels, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + CHECK(inputMat.useGpu_) << "Matrix type are not equal"; + + real* inputData = inputMat.getData(); + size_t frameNum = inputMat.getHeight(); + size_t height = imgSizeH; + size_t width = imgSizeW; + size_t depth = imgSizeD; + CHECK(depth * height * width * channels == inputMat.getWidth()); + CHECK(height_ == inputMat.getHeight()); + CHECK(width_ == outputD * outputH * outputW * channels); + + hl_avgpool3D_forward(frameNum, + inputData, + channels, + depth, + height, + width, + outputD, + outputH, + outputW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + getData(), + getStride()); +} + +void GpuMatrix::avgPool3DBackward(Matrix& outGrad, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW, + real scaleTargets, + real scaleOutput) { + CHECK(outGrad.useGpu_) << "Matrix type are not equal"; + + real* outDiff = outGrad.getData(); + size_t frameNum = outGrad.getHeight(); + size_t channels = outGrad.getWidth() / outputD / outputH / outputW; + size_t height = imgSizeH; + size_t width = imgSizeW; + size_t depth = imgSizeD; + CHECK(depth * height * width * channels == width_); + CHECK(height_ == outGrad.getHeight()); + CHECK(outGrad.getWidth() == outputD * outputH * outputW * channels); + + hl_avgpool3D_backward(frameNum, + outDiff, + channels, + depth, + height, + width, + outputD, + outputH, + outputW, + sizeZ, + sizeY, + sizeX, + strideD, + strideH, + strideW, + paddingD, + paddingH, + paddingW, + scaleTargets, + scaleOutput, + getData(), + outGrad.getStride()); +} + void GpuMatrix::maxSequenceForward(Matrix& input, const IVector& sequence, IVector& index) { @@ -1996,6 +2211,276 @@ void CpuMatrix::avgPoolBackward(Matrix& input, } } +void CpuMatrix::maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, + size_t channels, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + real* inputData = inputMat.getData(); + real* outData = getData(); + real* maxPoolIdxData = maxPoolIdx.getData(); + size_t num = inputMat.getHeight(); + size_t inWidth = imgSizeW; + size_t inHeight = imgSizeH; + size_t inDepth = imgSizeD; + CHECK(inHeight * inWidth * inDepth == inputMat.getWidth() / channels); + CHECK_EQ(num, this->getHeight()); + CHECK_EQ(channels * outputH * outputW * outputD, this->getWidth()); + size_t outStride = getStride(); + + /* initialize the data_ */ + for (size_t i = 0; i < height_; i++) { + for (size_t j = 0; j < width_; j++) { + outData[(i)*outStride + j] = -(real)FLT_MAX; + maxPoolIdxData[(i)*outStride + j] = -1; + } + } + + /* pool max one by one */ + for (size_t n = 0; n < num; ++n) { // frame by frame + if (!isContiguous()) { + outData = getData() + n * outStride; + maxPoolIdxData = maxPoolIdx.getData() + n * outStride; + } + for (size_t c = 0; c < channels; ++c) { // channel by channel + for (size_t pd = 0; pd < outputD; ++pd) { + for (size_t ph = 0; ph < outputH; ++ph) { + for (size_t pw = 0; pw < outputW; ++pw) { + int dstart = pd * strideD - paddingD; + int hstart = ph * strideH - paddingH; + int wstart = pw * strideW - paddingW; + int dend = std::min(dstart + sizeZ, inDepth); + int hend = std::min(hstart + sizeY, inHeight); + int wend = std::min(wstart + sizeX, inWidth); + dstart = std::max(dstart, 0); + hstart = std::max(hstart, 0); + wstart = std::max(wstart, 0); + int maxIdx = -1; + real maxOutData = outData[(pd * outputH + ph) * outputW + pw]; + for (int d = dstart; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + if (maxOutData < + inputData[(d * inHeight + h) * inWidth + w]) { + maxOutData = inputData[(d * inHeight + h) * inWidth + w]; + maxIdx = (d * inHeight + h) * inWidth + w; + } + } + } + } + outData[(pd * outputH + ph) * outputW + pw] = maxOutData; + maxPoolIdxData[(pd * outputH + ph) * outputW + pw] = maxIdx; + } + } + } + // compute offset + inputData += inDepth * inHeight * inWidth; + outData += outputD * outputH * outputW; + maxPoolIdxData += outputD * outputH * outputW; + } + } +} + +void CpuMatrix::maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW, + real scaleTargets, + real scaleOutput) { + size_t num = getHeight(); + size_t channels = size_t(width_ / imgSizeD / imgSizeH / imgSizeW); + CHECK(maxPoolIdx.getHeight() == outGrad.getHeight() && + maxPoolIdx.getWidth() == outGrad.getWidth()); + + real* tgtGrad = getData(); + real* otGrad = outGrad.getData(); + real* maxPoolIdxData = maxPoolIdx.getData(); + size_t outStride = outGrad.getStride(); + + for (size_t n = 0; n < num; ++n) { + if (!outGrad.isContiguous()) { + otGrad = outGrad.getData() + n * outStride; + maxPoolIdxData = maxPoolIdx.getData() + n * outStride; + } + for (size_t c = 0; c < channels; ++c) { + for (size_t pd = 0; pd < outputD; ++pd) { + for (size_t ph = 0; ph < outputH; ++ph) { + for (size_t pw = 0; pw < outputW; ++pw) { + const size_t index = (pd * outputH + ph) * outputW + pw; + const size_t tgtIdx = static_cast(maxPoolIdxData[index]); + tgtGrad[tgtIdx] = + scaleTargets * tgtGrad[tgtIdx] + scaleOutput * otGrad[index]; + } + } + } + // offset + tgtGrad += imgSizeD * imgSizeH * imgSizeW; + otGrad += outputD * outputH * outputW; + maxPoolIdxData += outputD * outputH * outputW; + } + } +} + +void CpuMatrix::avgPool3DForward(Matrix& input, + size_t channels, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + // The main loop + size_t num = input.getHeight(); + size_t inDepth = imgSizeD; + size_t inHeight = imgSizeH; + size_t inWidth = imgSizeW; + CHECK(inDepth * inHeight * inWidth * channels == input.getWidth()); + CHECK(outputD * outputH * outputW * channels * num == height_ * width_); + real* tgtData = getData(); + real* inData = input.getData(); + + for (size_t n = 0; n < num; ++n) { + if (!isContiguous()) { + tgtData = data_ + n * getStride(); + } + for (size_t c = 0; c < channels; ++c) { + for (size_t pd = 0; pd < outputD; ++pd) { + for (size_t ph = 0; ph < outputH; ++ph) { + for (size_t pw = 0; pw < outputW; ++pw) { + int dstart = pd * strideD - paddingD; + int hstart = ph * strideH - paddingH; + int wstart = pw * strideW - paddingW; + int dend = std::min(dstart + sizeZ, inDepth + paddingD); + int hend = std::min(hstart + sizeY, inHeight + paddingH); + int wend = std::min(wstart + sizeX, inWidth + paddingW); + int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart); + dstart = std::max(dstart, 0); + hstart = std::max(hstart, 0); + wstart = std::max(wstart, 0); + dend = std::min(dend, static_cast(inDepth)); + hend = std::min(hend, static_cast(inHeight)); + wend = std::min(wend, static_cast(inWidth)); + + CHECK(poolSize); + tgtData[(pd * outputH + ph) * outputW + pw] = 0; // clear + for (int d = dstart; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + tgtData[(pd * outputH + ph) * outputW + pw] += + inData[(d * inHeight + h) * inWidth + w]; + } + } + } + tgtData[(pd * outputH + ph) * outputW + pw] /= poolSize; + } + } + } + // compute offset + inData += inDepth * inHeight * inWidth; + tgtData += outputD * outputH * outputW; + } + } +} + +void CpuMatrix::avgPool3DBackward(Matrix& input, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW, + real scaleTargets, + real scaleOutput) { + size_t num = input.getHeight(); + size_t channels = input.getWidth() / outputD / outputH / outputW; + CHECK(imgSizeD * imgSizeH * imgSizeW * channels == getWidth()); + real* inData = input.getData(); + real* outData = getData(); + + for (size_t n = 0; n < num; ++n) { + if (!input.isContiguous()) { + inData = input.getData() + n * input.getStride(); + } + for (size_t c = 0; c < channels; ++c) { + for (size_t pd = 0; pd < outputD; ++pd) { + for (size_t ph = 0; ph < outputH; ++ph) { + for (size_t pw = 0; pw < outputW; ++pw) { + int dstart = pd * strideD - paddingD; + int hstart = ph * strideH - paddingH; + int wstart = pw * strideW - paddingW; + int dend = std::min(dstart + sizeZ, imgSizeD + paddingD); + int hend = std::min(hstart + sizeY, imgSizeH + paddingH); + int wend = std::min(wstart + sizeX, imgSizeW + paddingW); + int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart); + dstart = std::max(dstart, 0); + hstart = std::max(hstart, 0); + wstart = std::max(wstart, 0); + dend = std::min(dend, static_cast(imgSizeD)); + hend = std::min(hend, static_cast(imgSizeH)); + wend = std::min(wend, static_cast(imgSizeW)); + CHECK(poolSize); + for (int d = dstart; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + outData[(d * imgSizeH + h) * imgSizeW + w] += + inData[(pd * outputH + ph) * outputW + pw] / poolSize; + } + } + } + } + } + } + // offset + outData += imgSizeD * imgSizeH * imgSizeW; + inData += outputD * outputH * outputW; + } + } +} + /** * Input: one or more sequences. Each sequence contains some instances. * Output: output size is the number of input sequences (NOT input instances). diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index cc3a56f279cc6a17104c681a51f1ca907143fc44..431d4e071072317c8fdfdc4f0d13e7cd4e3d062b 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -928,15 +928,102 @@ public: size_t paddingW) { LOG(FATAL) << "Not implemeted"; } - /** - * Input: one or more sequences. Each sequence contains some instances. - * - * Output: output size is the number of input sequences (NOT input - * instances). - * - * output[i] is set to max_input[i]. + * Pooling 3D forward operation, pick out the largest element + * in the sizeX of value */ + virtual void maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, + size_t channels, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + LOG(FATAL) << "Not implemeted"; + } + + virtual void maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW, + real scaleTargets, + real scaleOutput) { + LOG(FATAL) << "Not implemeted"; + } + + virtual void avgPool3DForward(Matrix& input, + size_t channels, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW) { + LOG(FATAL) << "Not implemeted"; + } + + virtual void avgPool3DBackward(Matrix& input, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW, + real scaleTargets, + real scaleOutput) { + LOG(FATAL) << "Not implemeted"; + } + + /** + * Input: one or more sequences. Each sequence contains some instances. + * + * Output: output size is the number of input sequences (NOT input + * instances). + * + * output[i] is set to max_input[i]. + */ virtual void maxSequenceForward(Matrix& input, const IVector& sequence, IVector& index) { @@ -1384,6 +1471,82 @@ public: size_t paddingH, size_t paddingW); + void maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, + size_t channels, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW); + + void maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW, + real scaleTargets, + real scaleOutput); + + void avgPool3DForward(Matrix& input, + size_t channels, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW); + + void avgPool3DBackward(Matrix& input, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW, + real scaleTargets, + real scaleOutput); + void maxSequenceForward(Matrix& input, const IVector& sequence, IVector& index); @@ -1575,6 +1738,82 @@ public: size_t paddingH, size_t paddingW); + void maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, + size_t channels, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW); + + void maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW, + real scaleTargets, + real scaleOutput); + + void avgPool3DForward(Matrix& input, + size_t channels, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW); + + void avgPool3DBackward(Matrix& input, + size_t imgSizeD, + size_t imgSizeH, + size_t imgSizeW, + size_t outputD, + size_t outputH, + size_t outputW, + size_t sizeZ, + size_t sizeY, + size_t sizeX, + size_t strideD, + size_t strideH, + size_t strideW, + size_t paddingD, + size_t paddingH, + size_t paddingW, + real scaleTargets, + real scaleOutput); + void maxSequenceForward(Matrix& input, const IVector& sequence, IVector& index); diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index 3abe4484dbc86711c5798b0900a47d09a0d47299..103f06acc57d7a23f019f5e713f6cacf2179e9e0 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -1204,6 +1204,399 @@ TEST(Matrix, warpCTC) { } } +void testMaxPool3DFwdBwd(int numSamples, + int channels, + int imgSizeD, + int imgSizeH, + int imgSizeW, + int ksizeD, + int ksizeH, + int ksizeW, + int strideD, + int strideH, + int strideW, + int padD, + int padH, + int padW) { + int outD = outputSize(imgSizeD, ksizeD, padD, strideD, true); + int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true); + int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true); + + int inWidth = channels * imgSizeD * imgSizeH * imgSizeW; + MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); + + int outWidth = channels * outD * outH * outW; + MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); + MatrixPtr maxIdx = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr maxIdxGpu = GpuMatrix::create(numSamples, outWidth, false, true); + + input->randomizeUniform(); + target->randomizeUniform(); + inputGpu->copyFrom(*input); + targetGpu->copyFrom(*target); + + target->maxPool3DForward(*input, + *maxIdx, + channels, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW); + targetGpu->maxPool3DForward(*inputGpu, + *maxIdxGpu, + channels, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW); + MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); + targetCheck->copyFrom(*targetGpu); + checkMatrixEqual(target, targetCheck); + + MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); + MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpuGrad = + GpuMatrix::create(numSamples, outWidth, false, true); + + inputGrad->randomizeUniform(); + targetGrad->randomizeUniform(); + inputGpuGrad->copyFrom(*inputGrad); + targetGpuGrad->copyFrom(*targetGrad); + + inputGrad->maxPool3DBackward(*targetGrad, + *maxIdx, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW, + 1.0, + 1.0); + inputGpuGrad->maxPool3DBackward(*targetGpuGrad, + *maxIdxGpu, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW, + 1.0, + 1.0); + MatrixPtr targetBwdCheck = + CpuMatrix::create(numSamples, inWidth, false, false); + targetBwdCheck->copyFrom(*inputGpuGrad); + checkMatrixEqual(inputGrad, targetBwdCheck); +} + +void testAvgPool3DFwdBwd(int numSamples, + int channels, + int imgSizeD, + int imgSizeH, + int imgSizeW, + int ksizeD, + int ksizeH, + int ksizeW, + int strideD, + int strideH, + int strideW, + int padD, + int padH, + int padW) { + int outD = outputSize(imgSizeD, ksizeD, padD, strideD, true); + int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true); + int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true); + + int inWidth = imgSizeD * imgSizeH * imgSizeW * channels; + MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); + + int outWidth = channels * outD * outH * outW; + MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); + + input->randomizeUniform(); + target->randomizeUniform(); + inputGpu->copyFrom(*input); + targetGpu->copyFrom(*target); + + target->avgPool3DForward(*input, + channels, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW); + + targetGpu->avgPool3DForward(*inputGpu, + channels, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW); + + TensorCheckErr(*target, *targetGpu); + + MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); + MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpuGrad = + GpuMatrix::create(numSamples, outWidth, false, true); + + inputGrad->randomizeUniform(); + targetGrad->randomizeUniform(); + inputGpuGrad->copyFrom(*inputGrad); + targetGpuGrad->copyFrom(*targetGrad); + + inputGrad->avgPool3DBackward(*targetGrad, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW, + 1.0, + 1.0); + + inputGpuGrad->avgPool3DBackward(*targetGpuGrad, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW, + 1.0, + 1.0); + TensorCheckErr(*inputGrad, *inputGpuGrad); +} + +// TODO(yi): I noticed many such blindly combinatorial tests in this +// file. They are no help to locate defects at all. +TEST(Matrix, Pool3DFwdBwd) { + for (auto numSamples : {1, 3}) { + for (auto channels : {3}) { + for (auto imgSizeD : {9, 16}) { + for (auto imgSizeH : {9, 32}) { + for (auto imgSizeW : {9, 32}) { + for (auto sizeX : {3}) { + for (auto sizeY : {3}) { + for (auto sizeZ : {3}) { + for (auto sD : {2}) { + for (auto sH : {2}) { + for (auto sW : {2}) { + for (auto pD : {0, (sizeZ - 1) / 2}) { + for (auto pH : {0, (sizeY - 1) / 2}) { + for (auto pW : {0, (sizeX - 1) / 2}) { + VLOG(3) << " numSamples=" << numSamples + << " channels=" << channels + << " imgSizeD=" << imgSizeD + << " imgSizeH=" << imgSizeH + << " imgSizeW=" << imgSizeW + << " sizeX=" << sizeX + << " sizeY=" << sizeY + << " sizeZ=" << sizeZ << " strideD=" << sD + << " strideH=" << sH << " strideW=" << sW + << " padingD=" << pD << " padingH=" << pH + << " padingW=" << pW; + + testMaxPool3DFwdBwd(numSamples, + channels, + imgSizeD, + imgSizeH, + imgSizeW, + sizeX, + sizeY, + sizeZ, + sD, + sH, + sW, + pD, + pH, + pW); + testAvgPool3DFwdBwd(numSamples, + channels, + imgSizeD, + imgSizeH, + imgSizeW, + sizeX, + sizeY, + sizeZ, + sD, + sH, + sW, + pD, + pH, + pW); + } + } + } + } + } + } + } + } + } + } + } + } + } + } + + // for (auto numSamples : {1, 3}) { + // for (auto channels : {1, 3}) { + // for (auto imgSizeD : {9,16}) { + // for (auto imgSizeH : {9, 32}) { + // for (auto imgSizeW : {9, 32}) { + // for (auto sizeX : {2, 3}) { + // for (auto sizeY : {2, 3}) { + // for (auto sizeZ : {2,3}){ + // for (auto sD : {1, 2}) { + // for (auto sH : {1, 2}) { + // for (auto sW : {1, 2}) { + // for (auto pD : {0, (sizeZ - 1) / 2}){ + // for (auto pH : {0, (sizeY - 1) / 2}) { + // for (auto pW : {0, (sizeX - 1) / 2}) { + // VLOG(3) << " numSamples=" << numSamples + // << " channels=" << channels + // << " imgSizeD=" << imgSizeD + // << " imgSizeH=" << imgSizeH + // << " imgSizeW=" << imgSizeW + // << " sizeX=" << sizeX + // << " sizeY=" << sizeY + // << " sizeZ=" << sizeZ + // << " strideD=" << sD + // << " strideH=" << sH + // << " strideW=" << sW + // << " padingD=" << pD + // << " padingH=" << pH + // << " padingW=" << pW; + // + // testMaxPool3DFwdBwd(numSamples, + // channels, + // imgSizeD, + // imgSizeH, + // imgSizeW, + // sizeX, + // sizeY, + // sizeZ, + // sD, + // sH, + // sW, + // pD, + // pH, + // pW); + // testAvgPool3DFwdBwd(numSamples, + // channels, + // imgSizeD, + // imgSizeH, + // imgSizeW, + // sizeX, + // sizeY, + // sizeZ, + // sD, + // sH, + // sW, + // pD, + // pH, + // pW); + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } +} + void testMatrixCol2Vol(int depth, int height, int width) { int channel = 3; int filterX = 3, filterY = 4, filterZ = 5; @@ -1303,6 +1696,5 @@ TEST(Matrix, col2Vol) { } } } -/////// #endif diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 36b7803f073950437c938b54e6b5677b0c359151..4ddf023780c704cb10c51ee9e5d7cb63420f9d73 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -133,6 +133,12 @@ message PoolConfig { // if not set, use padding optional uint32 padding_y = 13; + + optional uint32 size_z = 14 [ default = 1 ]; + optional uint32 stride_z = 15 [ default = 1 ]; + optional uint32 output_z = 16 [ default = 1 ]; + optional uint32 img_size_z = 17 [ default = 1 ]; + optional uint32 padding_z = 18 [ default = 1 ]; } message SppConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 049efe6cfc1dcfa77504f7cd6a5fbc6bf610c3f0..152a56190c1ffddbf9590ed8f71308ceb88403f4 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -938,6 +938,31 @@ class Pool(Cfg): self.add_keys(locals()) +@config_class +class Pool3d(Cfg): + def __init__( + self, + pool_type, + channels, + size_x, + size_y=None, + size_z=None, + start=None, + stride=None, # 1 by defalut in protobuf + stride_y=None, + stride_z=None, + padding=None, # 0 by defalut in protobuf + padding_y=None, + padding_z=None): + self.add_keys(locals()) + self.filter_size_y = size_y if size_y else size_x + self.filter_size_z = size_z if size_z else size_x + self.padding_y = padding_y if padding_y else padding + self.padding_z = padding_z if padding_z else padding + self.stride_y = stride_y if stride_y else stride + self.stride_z = stride_z if stride_z else stride + + @config_class class SpatialPyramidPool(Cfg): def __init__(self, pool_type, pyramid_height, channels): @@ -1253,6 +1278,45 @@ def parse_pool(pool, input_layer_name, pool_conf, ceil_mode): pool_conf.stride_y, not ceil_mode) +def parse_pool3d(pool, input_layer_name, pool_conf, ceil_mode): + pool_conf.pool_type = pool.pool_type + config_assert(pool.pool_type in ['max-projection', 'avg-projection'], + "pool-type %s is not in " + "['max-projection', 'avg-projection']" % pool.pool_type) + + pool_conf.channels = pool.channels + + pool_conf.size_x = pool.size_x + pool_conf.stride = pool.stride + pool_conf.padding = pool.padding + + pool_conf.size_y = default(pool.size_y, pool_conf.size_x) + pool_conf.size_z = default(pool.size_z, pool_conf.size_x) + pool_conf.stride_y = default(pool.stride_y, pool_conf.stride) + pool_conf.stride_z = default(pool.stride_z, pool_conf.stride) + pool_conf.padding_y = default(pool.padding_y, pool_conf.padding) + pool_conf.padding_z = default(pool.padding_z, pool_conf.padding) + + pool_conf.img_size, pool_conf.img_size_y, pool_conf.img_size_z = \ + get_img3d_size(input_layer_name, pool.channels) + + config_assert(not pool.start, "start is deprecated in pooling.") + + if pool.padding is not None: + pool_conf.padding = pool.padding + pool_conf.padding_y = default(pool.padding_y, pool_conf.padding) + pool_conf.padding_z = default(pool.padding_z, pool_conf.padding) + pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x, + pool_conf.padding, pool_conf.stride, + not ceil_mode) + pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y, + pool_conf.padding_y, + pool_conf.stride_y, not ceil_mode) + pool_conf.output_z = cnn_output_size(pool_conf.img_size_z, pool_conf.size_z, + pool_conf.padding_z, + pool_conf.stride_z, not ceil_mode) + + def parse_spp(spp, input_layer_name, spp_conf): parse_image(spp, input_layer_name, spp_conf.image_conf) spp_conf.pool_type = spp.pool_type @@ -1897,9 +1961,9 @@ class DataLayer(LayerBase): def __init__(self, name, size, + depth=None, height=None, width=None, - depth=None, device=None): super(DataLayer, self).__init__( name, 'data', size, inputs=[], device=device) @@ -2215,6 +2279,35 @@ class PoolLayer(LayerBase): pool_conf.channels) +@config_layer('pool3d') +class Pool3DLayer(LayerBase): + def __init__(self, name, inputs, ceil_mode=True, **xargs): + super(Pool3DLayer, self).__init__( + name, 'pool3d', 0, inputs=inputs, **xargs) + for input_index in xrange(len(self.inputs)): + input_layer = self.get_input_layer(input_index) + pool_conf = self.config.inputs[input_index].pool_conf + parse_pool3d(self.inputs[input_index].pool, input_layer.name, + pool_conf, ceil_mode) + self.set_cnn_layer(name, pool_conf.output_z, pool_conf.output_y, + pool_conf.output_x, pool_conf.channels) + + def set_cnn_layer(self, + input_layer_name, + depth, + height, + width, + channels, + is_print=True): + size = depth * height * width * channels + self.set_layer_size(size) + self.set_layer_height_width(height, width) + self.set_layer_depth(depth) + if is_print: + print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" % + (input_layer_name, channels, depth, height, width, size)) + + @config_layer('spp') class SpatialPyramidPoolLayer(LayerBase): def __init__(self, name, inputs, **xargs): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 3de2f2f01da3778b3ae86f22e5c39f5193c7ccce..fdf4136aa512b09ab9a1a6d9cf387229d7984804 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -138,6 +138,7 @@ __all__ = [ 'slice_projection', 'seq_slice_layer', 'kmax_sequence_score_layer', + 'img_pool3d_layer', 'scale_shift_layer', 'img_conv3d_layer', ] @@ -168,6 +169,7 @@ class LayerType(object): EXCONVTRANS_LAYER = 'exconvt' CUDNNCONV_LAYER = 'cudnn_conv' POOL_LAYER = 'pool' + POOL3D_LAYER = 'pool3d' BATCH_NORM_LAYER = 'batch_norm' NORM_LAYER = 'norm' SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm' @@ -900,7 +902,7 @@ def mixed_layer(size=0, @layer_support() -def data_layer(name, size, height=None, width=None, depth=None, +def data_layer(name, size, depth=None, height=None, width=None, layer_attr=None): """ Define DataLayer For NeuralNetwork. @@ -938,8 +940,8 @@ def data_layer(name, size, height=None, width=None, depth=None, num_filters = None if height is not None and width is not None: num_filters = size / (width * height * depth) - assert num_filters * width * height*depth == size, \ - "size=%s width=%s height=%s depth=%s" % (size, width, height, depth) + assert num_filters * width * height * depth == size, \ + "size=%s width=%s height=%s depth=%s" % (size, width, height, depth) return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters) @@ -2663,6 +2665,146 @@ def img_pool_layer(input, size=l.config.size) +@wrap_name_default("pool3d") +@layer_support() +def img_pool3d_layer(input, + pool_size, + name=None, + num_channels=None, + pool_type=None, + stride=1, + padding=0, + layer_attr=None, + pool_size_y=None, + stride_y=None, + padding_y=None, + pool_size_z=None, + stride_z=None, + padding_z=None, + ceil_mode=True): + """ + Image pooling Layer. + + The details of pooling layer, please refer ufldl's pooling_ . + + .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/ + + - ceil_mode=True: + + .. math:: + + w = 1 + int(ceil(input\_width + 2 * padding - pool\_size) / float(stride)) + h = 1 + int(ceil(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y)) + d = 1 + int(ceil(input\_depth + 2 * padding\_z - pool\_size\_z) / float(stride\_z)) + + - ceil_mode=False: + + .. math:: + + w = 1 + int(floor(input\_width + 2 * padding - pool\_size) / float(stride)) + h = 1 + int(floor(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y)) + d = 1 + int(floor(input\_depth + 2 * padding\_z - pool\_size\_z) / float(stride\_z)) + + The example usage is: + + .. code-block:: python + + maxpool = img_pool3d_layer(input=conv, + pool_size=3, + num_channels=8, + stride=1, + padding=1, + pool_type=MaxPooling()) + + :param padding: pooling padding width. + :type padding: int|tuple|list + :param name: name of pooling layer + :type name: basestring. + :param input: layer's input + :type input: LayerOutput + :param pool_size: pooling window width + :type pool_size: int|tuple|list + :param num_channels: number of input channel. + :type num_channels: int + :param pool_type: pooling type. MaxPooling or AvgPooling. Default is + MaxPooling. + :type pool_type: BasePoolingType + :param stride: stride width of pooling. + :type stride: int|tuple|list + :param layer_attr: Extra Layer attribute. + :type layer_attr: ExtraLayerAttribute + :param ceil_mode: Wether to use ceil mode to calculate output height and with. + Defalut is True. If set false, Otherwise use floor. + + :type ceil_mode: bool + :return: LayerOutput object. + :rtype: LayerOutput + """ + if num_channels is None: + assert input.num_filters is not None + num_channels = input.num_filters + + if pool_type is None: + pool_type = MaxPooling() + elif isinstance(pool_type, AvgPooling): + pool_type.name = 'avg' + + type_name = pool_type.name + '-projection' \ + if ( + isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \ + else pool_type.name + + if isinstance(pool_size, collections.Sequence): + assert len(pool_size) == 3 + pool_size, pool_size_y, pool_size_z = pool_size + else: + pool_size_y = pool_size + pool_size_z = pool_size + + if isinstance(stride, collections.Sequence): + assert len(stride) == 3 + stride, stride_y, stride_z = stride + else: + stride_y = stride + stride_z = stride + + if isinstance(padding, collections.Sequence): + assert len(padding) == 3 + padding, padding_y, padding_y = padding + else: + padding_y = padding + padding_z = padding + + l = Layer( + name=name, + type=LayerType.POOL3D_LAYER, + inputs=[ + Input( + input.name, + pool=Pool3d( + pool_type=type_name, + channels=num_channels, + size_x=pool_size, + start=None, + stride=stride, + padding=padding, + size_y=pool_size_y, + stride_y=stride_y, + padding_y=padding_y, + size_z=pool_size_z, + stride_z=stride_z, + padding_z=padding_z)) + ], + ceil_mode=ceil_mode, + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name, + LayerType.POOL_LAYER, + parents=[input], + num_filters=num_channels, + size=l.config.size) + + @wrap_name_default("spp") @layer_support() def spp_layer(input, diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 7982d34bc5c0d4d99b0a8468ab0db86134764eab..7b132c23d68d66bc2430839533f04f1b80b50977 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -9,6 +9,7 @@ test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer -test_seq_slice_layer test_cross_entropy_over_beam test_conv3d_layer test_deconv3d_layer) +test_seq_slice_layer test_cross_entropy_over_beam test_pooling3D_layer +test_conv3d_layer test_deconv3d_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pooling3D_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pooling3D_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..8eb98593f6f692a445cf5088e101e9da3763b41d --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_pooling3D_layer.protostr @@ -0,0 +1,123 @@ +type: "nn" +layers { + name: "data_2d" + type: "data" + size: 6000 + active_type: "" + height: 20 + width: 10 +} +layers { + name: "pool___2d" + type: "pool" + size: 840 + active_type: "" + inputs { + input_layer_name: "data_2d" + pool_conf { + pool_type: "avg-projection" + channels: 30 + size_x: 5 + stride: 3 + output_x: 4 + img_size: 10 + padding: 1 + size_y: 5 + stride_y: 3 + output_y: 7 + img_size_y: 20 + padding_y: 1 + } + } + height: 7 + width: 4 +} +layers { + name: "data_3d_1" + type: "data" + size: 60000 + active_type: "" + height: 20 + width: 10 + depth: 10 +} +layers { + name: "pool_3d_1" + type: "pool3d" + size: 3360 + active_type: "" + inputs { + input_layer_name: "data_3d_1" + pool_conf { + pool_type: "avg-projection" + channels: 30 + size_x: 5 + stride: 3 + output_x: 4 + img_size: 10 + padding: 1 + size_y: 5 + stride_y: 3 + output_y: 7 + img_size_y: 20 + padding_y: 1 + size_z: 5 + stride_z: 3 + output_z: 4 + img_size_z: 10 + padding_z: 1 + } + } + height: 7 + width: 4 + depth: 4 +} +layers { + name: "pool_3d_2" + type: "pool3d" + size: 3360 + active_type: "" + inputs { + input_layer_name: "data_3d_1" + pool_conf { + pool_type: "max-projection" + channels: 30 + size_x: 5 + stride: 3 + output_x: 4 + img_size: 10 + padding: 1 + size_y: 5 + stride_y: 3 + output_y: 7 + img_size_y: 20 + padding_y: 1 + size_z: 5 + stride_z: 3 + output_z: 4 + img_size_z: 10 + padding_z: 1 + } + } + height: 7 + width: 4 + depth: 4 +} +input_layer_names: "data_2d" +output_layer_names: "pool___2d" +output_layer_names: "pool_3d_1" +output_layer_names: "pool_3d_2" +sub_models { + name: "root" + layer_names: "data_2d" + layer_names: "pool___2d" + layer_names: "data_3d_1" + layer_names: "pool_3d_1" + layer_names: "pool_3d_2" + input_layer_names: "data_2d" + output_layer_names: "pool___2d" + output_layer_names: "pool_3d_1" + output_layer_names: "pool_3d_2" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..0dbb921d41986e711d5b8b31caab1f8b6bdc47b8 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py @@ -0,0 +1,38 @@ +from paddle.trainer_config_helpers import * + +settings(batch_size=100, learning_rate=1e-5) + +data_2d = data_layer(name='data_2d', size=6000, height=20, width=10) + +pool_2d = img_pool_layer( + name="pool___2d", + input=data_2d, + num_channels=30, + pool_size=5, + stride=3, + padding=1, + pool_type=AvgPooling()) +outputs(pool_2d) + +data_3d = data_layer( + name='data_3d_1', size=60000, depth=10, height=20, width=10) + +pool_3d_1 = img_pool3d_layer( + name="pool_3d_1", + input=data_3d, + num_channels=30, + pool_size=5, + stride=3, + padding=1, + pool_type=AvgPooling()) +outputs(pool_3d_1) + +pool_3d_2 = img_pool3d_layer( + name="pool_3d_2", + input=data_3d, + num_channels=30, + pool_size=[5, 5, 5], + stride=[3, 3, 3], + padding=[1, 1, 1], + pool_type=MaxPooling()) +outputs(pool_3d_2)