提交 fcad0a3a 编写于 作者: C chengduo 提交者: GitHub

Merge pull request #3504 from chengduoZH/Add_3DPooling

Add 3DPoolingLayer
...@@ -173,6 +173,96 @@ extern void hl_avgpool_backward(const int frameCnt, ...@@ -173,6 +173,96 @@ extern void hl_avgpool_backward(const int frameCnt,
real* backGrad, real* backGrad,
const int outStride); const int outStride);
extern void hl_maxpool3D_forward(const int frameCnt,
const real* inputData,
const int channels,
const int depth,
const int height,
const int width,
const int pooledD,
const int pooledH,
const int pooledW,
const int sizeZ,
const int sizeY,
const int sizeX,
const int strideD,
const int strideH,
const int strideW,
const int paddingD,
const int paddingH,
const int paddingW,
real* tgtData,
real* maxPoolIdxData,
const int tgtStride);
extern void hl_maxpool3D_backward(const int frameCnt,
const real* outGrad,
const int channels,
const int depth,
const int height,
const int width,
const int pooledD,
const int pooledH,
const int pooledW,
const int sizeZ,
const int sizeY,
const int sizeX,
const int strideD,
const int strideH,
const int strideW,
const int paddingD,
const int paddingH,
const int paddingW,
real scaleA,
real scaleB,
real* targetGrad,
real* maxPoolIdxData,
const int outStride);
extern void hl_avgpool3D_forward(const int frameCnt,
const real* inputData,
const int channels,
const int depth,
const int height,
const int width,
const int pooledD,
const int pooledH,
const int pooledW,
const int sizeZ,
const int sizeY,
const int sizeX,
const int strideD,
const int strideH,
const int strideW,
const int paddingD,
const int paddingH,
const int paddingW,
real* tgtData,
const int tgtStride);
extern void hl_avgpool3D_backward(const int frameCnt,
const real* outGrad,
const int channels,
const int depth,
const int height,
const int width,
const int pooledD,
const int pooledH,
const int pooledW,
const int sizeZ,
const int sizeY,
const int sizeX,
const int strideD,
const int strideH,
const int strideW,
int paddingD,
int paddingH,
int paddingW,
real scaleA,
real scaleB,
real* backGrad,
const int outStride);
/** /**
* @brief Bilinear interpolation forward. * @brief Bilinear interpolation forward.
* *
...@@ -275,4 +365,4 @@ extern void hl_maxout_backward(real* inGrad, ...@@ -275,4 +365,4 @@ extern void hl_maxout_backward(real* inGrad,
size_t featLen, size_t featLen,
size_t groups); size_t groups);
#endif /* HL_CNN_H_ */ #endif // HL_CNN_H_
...@@ -87,6 +87,96 @@ inline void hl_avgpool_backward(const int frameCnt, ...@@ -87,6 +87,96 @@ inline void hl_avgpool_backward(const int frameCnt,
real* backGrad, real* backGrad,
const int outStride) {} const int outStride) {}
inline void hl_maxpool3D_forward(const int frameCnt,
const real* inputData,
const int channels,
const int depth,
const int height,
const int width,
const int pooledD,
const int pooledH,
const int pooledW,
const int sizeZ,
const int sizeY,
const int sizeX,
const int strideD,
const int strideH,
const int strideW,
const int paddingD,
const int paddingH,
const int paddingW,
real* tgtData,
real* maxPoolIdxData,
const int tgtStride) {}
inline void hl_maxpool3D_backward(const int frameCnt,
const real* outGrad,
const int channels,
const int depth,
const int height,
const int width,
const int pooledD,
const int pooledH,
const int pooledW,
const int sizeZ,
const int sizeY,
const int sizeX,
const int strideD,
const int strideH,
const int strideW,
const int paddingD,
const int paddingH,
const int paddingW,
real scaleA,
real scaleB,
real* targetGrad,
real* maxPoolIdxData,
const int outStride) {}
inline void hl_avgpool3D_forward(const int frameCnt,
const real* inputData,
const int channels,
const int depth,
const int height,
const int width,
const int pooledD,
const int pooledH,
const int pooledW,
const int sizeZ,
const int sizeY,
const int sizeX,
const int strideD,
const int strideH,
const int strideW,
const int paddingD,
const int paddingH,
const int paddingW,
real* tgtData,
const int tgtStride) {}
inline void hl_avgpool3D_backward(const int frameCnt,
const real* outGrad,
const int channels,
const int depth,
const int height,
const int width,
const int pooledD,
const int pooledH,
const int pooledW,
const int sizeZ,
const int sizeY,
const int sizeX,
const int strideD,
const int strideH,
const int strideW,
const int paddingD,
const int paddingH,
const int paddingW,
real scaleA,
real scaleB,
real* backGrad,
const int outStride) {}
inline void hl_bilinear_forward(const real* inData, inline void hl_bilinear_forward(const real* inData,
const size_t inImgH, const size_t inImgH,
const size_t inImgW, const size_t inImgW,
......
此差异已折叠。
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Pool3DLayer.h"
#include "PoolProjectionLayer.h"
#include "paddle/utils/Logging.h"
namespace paddle {
REGISTER_LAYER(pool3d, Pool3DLayer);
bool Pool3DLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
/* the size of inputs for pool-layer is 1 */
CHECK_EQ(config_.inputs_size(), 1);
const PoolConfig& conf = config_.inputs(0).pool_conf();
poolType_ = conf.pool_type();
channels_ = conf.channels();
sizeX_ = conf.size_x();
sizeY_ = conf.size_y();
sizeZ_ = conf.size_z();
strideW_ = conf.stride();
strideH_ = conf.stride_y();
strideD_ = conf.stride_z();
imgSizeW_ = conf.img_size();
imgSizeH_ = conf.img_size_y();
imgSizeD_ = conf.img_size_z();
paddingW_ = conf.padding();
paddingH_ = conf.padding_y();
paddingD_ = conf.padding_z();
outputW_ = conf.output_x();
outputH_ = conf.output_y();
outputD_ = conf.output_z();
return true;
}
size_t Pool3DLayer::getSize() {
CHECK_EQ(inputLayers_.size(), 1UL);
size_t layerSize = 0;
outputD_ = outputSize(imgSizeD_, sizeZ_, paddingD_, strideD_, false);
outputH_ = outputSize(imgSizeH_, sizeY_, paddingH_, strideH_, false);
outputW_ = outputSize(imgSizeW_, sizeX_, paddingW_, strideW_, false);
layerSize = outputD_ * outputH_ * outputW_ * channels_;
getOutput().setFrameHeight(outputH_);
getOutput().setFrameWidth(outputW_);
getOutput().setFrameDepth(outputD_);
return layerSize;
}
void Pool3DLayer::forward(PassType passType) {
Layer::forward(passType);
const MatrixPtr& inMat = inputLayers_[0]->getOutputValue();
size_t batchSize = inMat->getHeight();
size_t outWidth = getSize();
resetOutput(batchSize, outWidth);
Matrix::resizeOrCreate(maxPoolIdx_, batchSize, outWidth, false, useGpu_);
const MatrixPtr outMat = getOutputValue();
if (poolType_ == "avg") {
outMat->avgPool3DForward(*inMat,
channels_,
imgSizeD_,
imgSizeH_,
imgSizeW_,
outputD_,
outputH_,
outputW_,
sizeZ_,
sizeY_,
sizeX_,
strideD_,
strideH_,
strideW_,
paddingD_,
paddingH_,
paddingW_);
} else if (poolType_ == "max") {
outMat->maxPool3DForward(*inMat,
*maxPoolIdx_,
channels_,
imgSizeD_,
imgSizeH_,
imgSizeW_,
outputD_,
outputH_,
outputW_,
sizeZ_,
sizeY_,
sizeX_,
strideD_,
strideH_,
strideW_,
paddingD_,
paddingH_,
paddingW_);
} else {
LOG(FATAL) << "Unknown pool type: " << poolType_;
}
forwardActivation();
}
void Pool3DLayer::backward(const UpdateCallback& callback) {
backwardActivation();
(void)callback;
if (NULL == getInputGrad(0)) return;
MatrixPtr inMat = inputLayers_[0]->getOutputValue();
MatrixPtr inGradMat = inputLayers_[0]->getOutputGrad();
MatrixPtr outMat = getOutputValue();
MatrixPtr outGradMat = getOutputGrad();
if (poolType_ == "avg") {
inGradMat->avgPool3DBackward(*outGradMat,
imgSizeD_,
imgSizeH_,
imgSizeW_,
outputD_,
outputH_,
outputW_,
sizeZ_,
sizeY_,
sizeZ_,
strideD_,
strideH_,
strideW_,
paddingD_,
paddingH_,
paddingW_,
1.0,
1.0);
} else if (poolType_ == "max") {
inGradMat->maxPool3DBackward(*outGradMat,
*maxPoolIdx_,
imgSizeD_,
imgSizeH_,
imgSizeW_,
outputD_,
outputH_,
outputW_,
sizeZ_,
sizeY_,
sizeZ_,
strideD_,
strideH_,
strideW_,
paddingD_,
paddingH_,
paddingW_,
1.0,
1.0);
} else {
LOG(FATAL) << "Unknown pool type: " << poolType_;
}
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "Layer.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief Basic parent layer of pooling
* Pools the input within regions
*/
class Pool3DLayer : public Layer {
public:
explicit Pool3DLayer(const LayerConfig& config) : Layer(config) {}
~Pool3DLayer() {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
size_t getSize();
protected:
int channels_;
int sizeX_, sizeY_, sizeZ_;
int strideW_, strideH_, strideD_;
int paddingW_, paddingH_, paddingD_;
int imgSizeW_, imgSizeH_, imgSizeD_;
int outputW_, outputH_, outputD_;
std::string poolType_;
MatrixPtr maxPoolIdx_;
};
} // namespace paddle
...@@ -1246,6 +1246,75 @@ TEST(Layer, PoolLayer) { ...@@ -1246,6 +1246,75 @@ TEST(Layer, PoolLayer) {
#endif #endif
} }
void setPool3DConfig(TestConfig* config,
PoolConfig* pool,
const string& poolType) {
// filter size
const int NUM_FILTERS = 16;
const int FILTER_SIZE = 3;
const int FILTER_SIZE_Y = 3;
const int FILTER_SIZE_Z = 3;
const int CHANNELS = 16;
(*config).biasSize = 0;
(*config).layerConfig.set_type("pool3d");
(*config).layerConfig.set_num_filters(NUM_FILTERS);
int kw = FILTER_SIZE, kh = FILTER_SIZE_Y, kd = FILTER_SIZE_Z;
int pw = 0, ph = 0, pd = 0;
int sw = 2, sh = 2, sd = 2;
pool->set_pool_type(poolType);
pool->set_pool_type("avg");
pool->set_channels(CHANNELS);
pool->set_size_x(kw);
pool->set_size_y(kh);
pool->set_size_z(kd);
pool->set_padding(0);
pool->set_padding_y(0);
pool->set_padding_z(0);
pool->set_stride(sw);
pool->set_stride_y(sh);
pool->set_stride_z(sd);
pool->set_start(0);
int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false);
int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false);
int od = outputSize(pool->img_size_z(), kd, pd, sd, /* caffeMode */ false);
pool->set_output_x(ow);
pool->set_output_y(oh);
pool->set_output_z(od);
}
void testPool3DLayer(const string& poolType, bool trans, bool useGpu) {
TestConfig config;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 11664, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
PoolConfig* pool = input->mutable_pool_conf();
const int IMAGE_SIZE = 9;
const int IMAGE_SIZE_Y = 9;
const int IMAGE_SIZE_Z = 9;
pool->set_img_size(IMAGE_SIZE);
pool->set_img_size_y(IMAGE_SIZE_Y);
pool->set_img_size_z(IMAGE_SIZE_Z);
setPool3DConfig(&config, pool, poolType);
config.layerConfig.set_size(pool->output_x() * pool->output_y() *
pool->channels());
testLayerGrad(config, "pool3d", 100, trans, useGpu);
}
TEST(Layer, Pool3DLayer) {
testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ false);
testPool3DLayer("max", /* trans= */ false, /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU
testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ true);
testPool3DLayer("max", /* trans= */ false, /* useGpu= */ true);
#endif
}
void testSppLayer(const string& poolType, void testSppLayer(const string& poolType,
const int pyramidHeight, const int pyramidHeight,
bool trans, bool trans,
......
此差异已折叠。
...@@ -928,15 +928,102 @@ public: ...@@ -928,15 +928,102 @@ public:
size_t paddingW) { size_t paddingW) {
LOG(FATAL) << "Not implemeted"; LOG(FATAL) << "Not implemeted";
} }
/** /**
* Input: one or more sequences. Each sequence contains some instances. * Pooling 3D forward operation, pick out the largest element
* * in the sizeX of value
* Output: output size is the number of input sequences (NOT input
* instances).
*
* output[i] is set to max_input[i].
*/ */
virtual void maxPool3DForward(Matrix& inputMat,
Matrix& maxPoolIdx,
size_t channels,
size_t imgSizeD,
size_t imgSizeH,
size_t imgSizeW,
size_t outputD,
size_t outputH,
size_t outputW,
size_t sizeZ,
size_t sizeY,
size_t sizeX,
size_t strideD,
size_t strideH,
size_t strideW,
size_t paddingD,
size_t paddingH,
size_t paddingW) {
LOG(FATAL) << "Not implemeted";
}
virtual void maxPool3DBackward(Matrix& outGrad,
Matrix& maxPoolIdx,
size_t imgSizeD,
size_t imgSizeH,
size_t imgSizeW,
size_t outputD,
size_t outputH,
size_t outputW,
size_t sizeZ,
size_t sizeY,
size_t sizeX,
size_t strideD,
size_t strideH,
size_t strideW,
size_t paddingD,
size_t paddingH,
size_t paddingW,
real scaleTargets,
real scaleOutput) {
LOG(FATAL) << "Not implemeted";
}
virtual void avgPool3DForward(Matrix& input,
size_t channels,
size_t imgSizeD,
size_t imgSizeH,
size_t imgSizeW,
size_t outputD,
size_t outputH,
size_t outputW,
size_t sizeZ,
size_t sizeY,
size_t sizeX,
size_t strideD,
size_t strideH,
size_t strideW,
size_t paddingD,
size_t paddingH,
size_t paddingW) {
LOG(FATAL) << "Not implemeted";
}
virtual void avgPool3DBackward(Matrix& input,
size_t imgSizeD,
size_t imgSizeH,
size_t imgSizeW,
size_t outputD,
size_t outputH,
size_t outputW,
size_t sizeZ,
size_t sizeY,
size_t sizeX,
size_t strideD,
size_t strideH,
size_t strideW,
size_t paddingD,
size_t paddingH,
size_t paddingW,
real scaleTargets,
real scaleOutput) {
LOG(FATAL) << "Not implemeted";
}
/**
* Input: one or more sequences. Each sequence contains some instances.
*
* Output: output size is the number of input sequences (NOT input
* instances).
*
* output[i] is set to max_input[i].
*/
virtual void maxSequenceForward(Matrix& input, virtual void maxSequenceForward(Matrix& input,
const IVector& sequence, const IVector& sequence,
IVector& index) { IVector& index) {
...@@ -1384,6 +1471,82 @@ public: ...@@ -1384,6 +1471,82 @@ public:
size_t paddingH, size_t paddingH,
size_t paddingW); size_t paddingW);
void maxPool3DForward(Matrix& inputMat,
Matrix& maxPoolIdx,
size_t channels,
size_t imgSizeD,
size_t imgSizeH,
size_t imgSizeW,
size_t outputD,
size_t outputH,
size_t outputW,
size_t sizeZ,
size_t sizeY,
size_t sizeX,
size_t strideD,
size_t strideH,
size_t strideW,
size_t paddingD,
size_t paddingH,
size_t paddingW);
void maxPool3DBackward(Matrix& outGrad,
Matrix& maxPoolIdx,
size_t imgSizeD,
size_t imgSizeH,
size_t imgSizeW,
size_t outputD,
size_t outputH,
size_t outputW,
size_t sizeZ,
size_t sizeY,
size_t sizeX,
size_t strideD,
size_t strideH,
size_t strideW,
size_t paddingD,
size_t paddingH,
size_t paddingW,
real scaleTargets,
real scaleOutput);
void avgPool3DForward(Matrix& input,
size_t channels,
size_t imgSizeD,
size_t imgSizeH,
size_t imgSizeW,
size_t outputD,
size_t outputH,
size_t outputW,
size_t sizeZ,
size_t sizeY,
size_t sizeX,
size_t strideD,
size_t strideH,
size_t strideW,
size_t paddingD,
size_t paddingH,
size_t paddingW);
void avgPool3DBackward(Matrix& input,
size_t imgSizeD,
size_t imgSizeH,
size_t imgSizeW,
size_t outputD,
size_t outputH,
size_t outputW,
size_t sizeZ,
size_t sizeY,
size_t sizeX,
size_t strideD,
size_t strideH,
size_t strideW,
size_t paddingD,
size_t paddingH,
size_t paddingW,
real scaleTargets,
real scaleOutput);
void maxSequenceForward(Matrix& input, void maxSequenceForward(Matrix& input,
const IVector& sequence, const IVector& sequence,
IVector& index); IVector& index);
...@@ -1575,6 +1738,82 @@ public: ...@@ -1575,6 +1738,82 @@ public:
size_t paddingH, size_t paddingH,
size_t paddingW); size_t paddingW);
void maxPool3DForward(Matrix& inputMat,
Matrix& maxPoolIdx,
size_t channels,
size_t imgSizeD,
size_t imgSizeH,
size_t imgSizeW,
size_t outputD,
size_t outputH,
size_t outputW,
size_t sizeZ,
size_t sizeY,
size_t sizeX,
size_t strideD,
size_t strideH,
size_t strideW,
size_t paddingD,
size_t paddingH,
size_t paddingW);
void maxPool3DBackward(Matrix& outGrad,
Matrix& maxPoolIdx,
size_t imgSizeD,
size_t imgSizeH,
size_t imgSizeW,
size_t outputD,
size_t outputH,
size_t outputW,
size_t sizeZ,
size_t sizeY,
size_t sizeX,
size_t strideD,
size_t strideH,
size_t strideW,
size_t paddingD,
size_t paddingH,
size_t paddingW,
real scaleTargets,
real scaleOutput);
void avgPool3DForward(Matrix& input,
size_t channels,
size_t imgSizeD,
size_t imgSizeH,
size_t imgSizeW,
size_t outputD,
size_t outputH,
size_t outputW,
size_t sizeZ,
size_t sizeY,
size_t sizeX,
size_t strideD,
size_t strideH,
size_t strideW,
size_t paddingD,
size_t paddingH,
size_t paddingW);
void avgPool3DBackward(Matrix& input,
size_t imgSizeD,
size_t imgSizeH,
size_t imgSizeW,
size_t outputD,
size_t outputH,
size_t outputW,
size_t sizeZ,
size_t sizeY,
size_t sizeX,
size_t strideD,
size_t strideH,
size_t strideW,
size_t paddingD,
size_t paddingH,
size_t paddingW,
real scaleTargets,
real scaleOutput);
void maxSequenceForward(Matrix& input, void maxSequenceForward(Matrix& input,
const IVector& sequence, const IVector& sequence,
IVector& index); IVector& index);
......
...@@ -1204,6 +1204,399 @@ TEST(Matrix, warpCTC) { ...@@ -1204,6 +1204,399 @@ TEST(Matrix, warpCTC) {
} }
} }
void testMaxPool3DFwdBwd(int numSamples,
int channels,
int imgSizeD,
int imgSizeH,
int imgSizeW,
int ksizeD,
int ksizeH,
int ksizeW,
int strideD,
int strideH,
int strideW,
int padD,
int padH,
int padW) {
int outD = outputSize(imgSizeD, ksizeD, padD, strideD, true);
int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true);
int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true);
int inWidth = channels * imgSizeD * imgSizeH * imgSizeW;
MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
int outWidth = channels * outD * outH * outW;
MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
MatrixPtr maxIdx = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr maxIdxGpu = GpuMatrix::create(numSamples, outWidth, false, true);
input->randomizeUniform();
target->randomizeUniform();
inputGpu->copyFrom(*input);
targetGpu->copyFrom(*target);
target->maxPool3DForward(*input,
*maxIdx,
channels,
imgSizeD,
imgSizeH,
imgSizeW,
outD,
outH,
outW,
ksizeD,
ksizeH,
ksizeW,
strideD,
strideH,
strideW,
padD,
padH,
padW);
targetGpu->maxPool3DForward(*inputGpu,
*maxIdxGpu,
channels,
imgSizeD,
imgSizeH,
imgSizeW,
outD,
outH,
outW,
ksizeD,
ksizeH,
ksizeW,
strideD,
strideH,
strideW,
padD,
padH,
padW);
MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
targetCheck->copyFrom(*targetGpu);
checkMatrixEqual(target, targetCheck);
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpuGrad =
GpuMatrix::create(numSamples, outWidth, false, true);
inputGrad->randomizeUniform();
targetGrad->randomizeUniform();
inputGpuGrad->copyFrom(*inputGrad);
targetGpuGrad->copyFrom(*targetGrad);
inputGrad->maxPool3DBackward(*targetGrad,
*maxIdx,
imgSizeD,
imgSizeH,
imgSizeW,
outD,
outH,
outW,
ksizeD,
ksizeH,
ksizeW,
strideD,
strideH,
strideW,
padD,
padH,
padW,
1.0,
1.0);
inputGpuGrad->maxPool3DBackward(*targetGpuGrad,
*maxIdxGpu,
imgSizeD,
imgSizeH,
imgSizeW,
outD,
outH,
outW,
ksizeD,
ksizeH,
ksizeW,
strideD,
strideH,
strideW,
padD,
padH,
padW,
1.0,
1.0);
MatrixPtr targetBwdCheck =
CpuMatrix::create(numSamples, inWidth, false, false);
targetBwdCheck->copyFrom(*inputGpuGrad);
checkMatrixEqual(inputGrad, targetBwdCheck);
}
void testAvgPool3DFwdBwd(int numSamples,
int channels,
int imgSizeD,
int imgSizeH,
int imgSizeW,
int ksizeD,
int ksizeH,
int ksizeW,
int strideD,
int strideH,
int strideW,
int padD,
int padH,
int padW) {
int outD = outputSize(imgSizeD, ksizeD, padD, strideD, true);
int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true);
int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true);
int inWidth = imgSizeD * imgSizeH * imgSizeW * channels;
MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
int outWidth = channels * outD * outH * outW;
MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
input->randomizeUniform();
target->randomizeUniform();
inputGpu->copyFrom(*input);
targetGpu->copyFrom(*target);
target->avgPool3DForward(*input,
channels,
imgSizeD,
imgSizeH,
imgSizeW,
outD,
outH,
outW,
ksizeD,
ksizeH,
ksizeW,
strideD,
strideH,
strideW,
padD,
padH,
padW);
targetGpu->avgPool3DForward(*inputGpu,
channels,
imgSizeD,
imgSizeH,
imgSizeW,
outD,
outH,
outW,
ksizeD,
ksizeH,
ksizeW,
strideD,
strideH,
strideW,
padD,
padH,
padW);
TensorCheckErr(*target, *targetGpu);
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpuGrad =
GpuMatrix::create(numSamples, outWidth, false, true);
inputGrad->randomizeUniform();
targetGrad->randomizeUniform();
inputGpuGrad->copyFrom(*inputGrad);
targetGpuGrad->copyFrom(*targetGrad);
inputGrad->avgPool3DBackward(*targetGrad,
imgSizeD,
imgSizeH,
imgSizeW,
outD,
outH,
outW,
ksizeD,
ksizeH,
ksizeW,
strideD,
strideH,
strideW,
padD,
padH,
padW,
1.0,
1.0);
inputGpuGrad->avgPool3DBackward(*targetGpuGrad,
imgSizeD,
imgSizeH,
imgSizeW,
outD,
outH,
outW,
ksizeD,
ksizeH,
ksizeW,
strideD,
strideH,
strideW,
padD,
padH,
padW,
1.0,
1.0);
TensorCheckErr(*inputGrad, *inputGpuGrad);
}
// TODO(yi): I noticed many such blindly combinatorial tests in this
// file. They are no help to locate defects at all.
TEST(Matrix, Pool3DFwdBwd) {
for (auto numSamples : {1, 3}) {
for (auto channels : {3}) {
for (auto imgSizeD : {9, 16}) {
for (auto imgSizeH : {9, 32}) {
for (auto imgSizeW : {9, 32}) {
for (auto sizeX : {3}) {
for (auto sizeY : {3}) {
for (auto sizeZ : {3}) {
for (auto sD : {2}) {
for (auto sH : {2}) {
for (auto sW : {2}) {
for (auto pD : {0, (sizeZ - 1) / 2}) {
for (auto pH : {0, (sizeY - 1) / 2}) {
for (auto pW : {0, (sizeX - 1) / 2}) {
VLOG(3) << " numSamples=" << numSamples
<< " channels=" << channels
<< " imgSizeD=" << imgSizeD
<< " imgSizeH=" << imgSizeH
<< " imgSizeW=" << imgSizeW
<< " sizeX=" << sizeX
<< " sizeY=" << sizeY
<< " sizeZ=" << sizeZ << " strideD=" << sD
<< " strideH=" << sH << " strideW=" << sW
<< " padingD=" << pD << " padingH=" << pH
<< " padingW=" << pW;
testMaxPool3DFwdBwd(numSamples,
channels,
imgSizeD,
imgSizeH,
imgSizeW,
sizeX,
sizeY,
sizeZ,
sD,
sH,
sW,
pD,
pH,
pW);
testAvgPool3DFwdBwd(numSamples,
channels,
imgSizeD,
imgSizeH,
imgSizeW,
sizeX,
sizeY,
sizeZ,
sD,
sH,
sW,
pD,
pH,
pW);
}
}
}
}
}
}
}
}
}
}
}
}
}
}
// for (auto numSamples : {1, 3}) {
// for (auto channels : {1, 3}) {
// for (auto imgSizeD : {9,16}) {
// for (auto imgSizeH : {9, 32}) {
// for (auto imgSizeW : {9, 32}) {
// for (auto sizeX : {2, 3}) {
// for (auto sizeY : {2, 3}) {
// for (auto sizeZ : {2,3}){
// for (auto sD : {1, 2}) {
// for (auto sH : {1, 2}) {
// for (auto sW : {1, 2}) {
// for (auto pD : {0, (sizeZ - 1) / 2}){
// for (auto pH : {0, (sizeY - 1) / 2}) {
// for (auto pW : {0, (sizeX - 1) / 2}) {
// VLOG(3) << " numSamples=" << numSamples
// << " channels=" << channels
// << " imgSizeD=" << imgSizeD
// << " imgSizeH=" << imgSizeH
// << " imgSizeW=" << imgSizeW
// << " sizeX=" << sizeX
// << " sizeY=" << sizeY
// << " sizeZ=" << sizeZ
// << " strideD=" << sD
// << " strideH=" << sH
// << " strideW=" << sW
// << " padingD=" << pD
// << " padingH=" << pH
// << " padingW=" << pW;
//
// testMaxPool3DFwdBwd(numSamples,
// channels,
// imgSizeD,
// imgSizeH,
// imgSizeW,
// sizeX,
// sizeY,
// sizeZ,
// sD,
// sH,
// sW,
// pD,
// pH,
// pW);
// testAvgPool3DFwdBwd(numSamples,
// channels,
// imgSizeD,
// imgSizeH,
// imgSizeW,
// sizeX,
// sizeY,
// sizeZ,
// sD,
// sH,
// sW,
// pD,
// pH,
// pW);
// }
// }
// }
// }
// }
// }
// }
// }
// }
// }
// }
// }
// }
// }
}
void testMatrixCol2Vol(int depth, int height, int width) { void testMatrixCol2Vol(int depth, int height, int width) {
int channel = 3; int channel = 3;
int filterX = 3, filterY = 4, filterZ = 5; int filterX = 3, filterY = 4, filterZ = 5;
...@@ -1303,6 +1696,5 @@ TEST(Matrix, col2Vol) { ...@@ -1303,6 +1696,5 @@ TEST(Matrix, col2Vol) {
} }
} }
} }
///////
#endif #endif
...@@ -133,6 +133,12 @@ message PoolConfig { ...@@ -133,6 +133,12 @@ message PoolConfig {
// if not set, use padding // if not set, use padding
optional uint32 padding_y = 13; optional uint32 padding_y = 13;
optional uint32 size_z = 14 [ default = 1 ];
optional uint32 stride_z = 15 [ default = 1 ];
optional uint32 output_z = 16 [ default = 1 ];
optional uint32 img_size_z = 17 [ default = 1 ];
optional uint32 padding_z = 18 [ default = 1 ];
} }
message SppConfig { message SppConfig {
......
...@@ -938,6 +938,31 @@ class Pool(Cfg): ...@@ -938,6 +938,31 @@ class Pool(Cfg):
self.add_keys(locals()) self.add_keys(locals())
@config_class
class Pool3d(Cfg):
def __init__(
self,
pool_type,
channels,
size_x,
size_y=None,
size_z=None,
start=None,
stride=None, # 1 by defalut in protobuf
stride_y=None,
stride_z=None,
padding=None, # 0 by defalut in protobuf
padding_y=None,
padding_z=None):
self.add_keys(locals())
self.filter_size_y = size_y if size_y else size_x
self.filter_size_z = size_z if size_z else size_x
self.padding_y = padding_y if padding_y else padding
self.padding_z = padding_z if padding_z else padding
self.stride_y = stride_y if stride_y else stride
self.stride_z = stride_z if stride_z else stride
@config_class @config_class
class SpatialPyramidPool(Cfg): class SpatialPyramidPool(Cfg):
def __init__(self, pool_type, pyramid_height, channels): def __init__(self, pool_type, pyramid_height, channels):
...@@ -1253,6 +1278,45 @@ def parse_pool(pool, input_layer_name, pool_conf, ceil_mode): ...@@ -1253,6 +1278,45 @@ def parse_pool(pool, input_layer_name, pool_conf, ceil_mode):
pool_conf.stride_y, not ceil_mode) pool_conf.stride_y, not ceil_mode)
def parse_pool3d(pool, input_layer_name, pool_conf, ceil_mode):
pool_conf.pool_type = pool.pool_type
config_assert(pool.pool_type in ['max-projection', 'avg-projection'],
"pool-type %s is not in "
"['max-projection', 'avg-projection']" % pool.pool_type)
pool_conf.channels = pool.channels
pool_conf.size_x = pool.size_x
pool_conf.stride = pool.stride
pool_conf.padding = pool.padding
pool_conf.size_y = default(pool.size_y, pool_conf.size_x)
pool_conf.size_z = default(pool.size_z, pool_conf.size_x)
pool_conf.stride_y = default(pool.stride_y, pool_conf.stride)
pool_conf.stride_z = default(pool.stride_z, pool_conf.stride)
pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
pool_conf.padding_z = default(pool.padding_z, pool_conf.padding)
pool_conf.img_size, pool_conf.img_size_y, pool_conf.img_size_z = \
get_img3d_size(input_layer_name, pool.channels)
config_assert(not pool.start, "start is deprecated in pooling.")
if pool.padding is not None:
pool_conf.padding = pool.padding
pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
pool_conf.padding_z = default(pool.padding_z, pool_conf.padding)
pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x,
pool_conf.padding, pool_conf.stride,
not ceil_mode)
pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y,
pool_conf.padding_y,
pool_conf.stride_y, not ceil_mode)
pool_conf.output_z = cnn_output_size(pool_conf.img_size_z, pool_conf.size_z,
pool_conf.padding_z,
pool_conf.stride_z, not ceil_mode)
def parse_spp(spp, input_layer_name, spp_conf): def parse_spp(spp, input_layer_name, spp_conf):
parse_image(spp, input_layer_name, spp_conf.image_conf) parse_image(spp, input_layer_name, spp_conf.image_conf)
spp_conf.pool_type = spp.pool_type spp_conf.pool_type = spp.pool_type
...@@ -1897,9 +1961,9 @@ class DataLayer(LayerBase): ...@@ -1897,9 +1961,9 @@ class DataLayer(LayerBase):
def __init__(self, def __init__(self,
name, name,
size, size,
depth=None,
height=None, height=None,
width=None, width=None,
depth=None,
device=None): device=None):
super(DataLayer, self).__init__( super(DataLayer, self).__init__(
name, 'data', size, inputs=[], device=device) name, 'data', size, inputs=[], device=device)
...@@ -2215,6 +2279,35 @@ class PoolLayer(LayerBase): ...@@ -2215,6 +2279,35 @@ class PoolLayer(LayerBase):
pool_conf.channels) pool_conf.channels)
@config_layer('pool3d')
class Pool3DLayer(LayerBase):
def __init__(self, name, inputs, ceil_mode=True, **xargs):
super(Pool3DLayer, self).__init__(
name, 'pool3d', 0, inputs=inputs, **xargs)
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
pool_conf = self.config.inputs[input_index].pool_conf
parse_pool3d(self.inputs[input_index].pool, input_layer.name,
pool_conf, ceil_mode)
self.set_cnn_layer(name, pool_conf.output_z, pool_conf.output_y,
pool_conf.output_x, pool_conf.channels)
def set_cnn_layer(self,
input_layer_name,
depth,
height,
width,
channels,
is_print=True):
size = depth * height * width * channels
self.set_layer_size(size)
self.set_layer_height_width(height, width)
self.set_layer_depth(depth)
if is_print:
print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" %
(input_layer_name, channels, depth, height, width, size))
@config_layer('spp') @config_layer('spp')
class SpatialPyramidPoolLayer(LayerBase): class SpatialPyramidPoolLayer(LayerBase):
def __init__(self, name, inputs, **xargs): def __init__(self, name, inputs, **xargs):
......
...@@ -138,6 +138,7 @@ __all__ = [ ...@@ -138,6 +138,7 @@ __all__ = [
'slice_projection', 'slice_projection',
'seq_slice_layer', 'seq_slice_layer',
'kmax_sequence_score_layer', 'kmax_sequence_score_layer',
'img_pool3d_layer',
'scale_shift_layer', 'scale_shift_layer',
'img_conv3d_layer', 'img_conv3d_layer',
] ]
...@@ -168,6 +169,7 @@ class LayerType(object): ...@@ -168,6 +169,7 @@ class LayerType(object):
EXCONVTRANS_LAYER = 'exconvt' EXCONVTRANS_LAYER = 'exconvt'
CUDNNCONV_LAYER = 'cudnn_conv' CUDNNCONV_LAYER = 'cudnn_conv'
POOL_LAYER = 'pool' POOL_LAYER = 'pool'
POOL3D_LAYER = 'pool3d'
BATCH_NORM_LAYER = 'batch_norm' BATCH_NORM_LAYER = 'batch_norm'
NORM_LAYER = 'norm' NORM_LAYER = 'norm'
SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm' SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm'
...@@ -900,7 +902,7 @@ def mixed_layer(size=0, ...@@ -900,7 +902,7 @@ def mixed_layer(size=0,
@layer_support() @layer_support()
def data_layer(name, size, height=None, width=None, depth=None, def data_layer(name, size, depth=None, height=None, width=None,
layer_attr=None): layer_attr=None):
""" """
Define DataLayer For NeuralNetwork. Define DataLayer For NeuralNetwork.
...@@ -938,8 +940,8 @@ def data_layer(name, size, height=None, width=None, depth=None, ...@@ -938,8 +940,8 @@ def data_layer(name, size, height=None, width=None, depth=None,
num_filters = None num_filters = None
if height is not None and width is not None: if height is not None and width is not None:
num_filters = size / (width * height * depth) num_filters = size / (width * height * depth)
assert num_filters * width * height*depth == size, \ assert num_filters * width * height * depth == size, \
"size=%s width=%s height=%s depth=%s" % (size, width, height, depth) "size=%s width=%s height=%s depth=%s" % (size, width, height, depth)
return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters) return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters)
...@@ -2663,6 +2665,146 @@ def img_pool_layer(input, ...@@ -2663,6 +2665,146 @@ def img_pool_layer(input,
size=l.config.size) size=l.config.size)
@wrap_name_default("pool3d")
@layer_support()
def img_pool3d_layer(input,
pool_size,
name=None,
num_channels=None,
pool_type=None,
stride=1,
padding=0,
layer_attr=None,
pool_size_y=None,
stride_y=None,
padding_y=None,
pool_size_z=None,
stride_z=None,
padding_z=None,
ceil_mode=True):
"""
Image pooling Layer.
The details of pooling layer, please refer ufldl's pooling_ .
.. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/
- ceil_mode=True:
.. math::
w = 1 + int(ceil(input\_width + 2 * padding - pool\_size) / float(stride))
h = 1 + int(ceil(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y))
d = 1 + int(ceil(input\_depth + 2 * padding\_z - pool\_size\_z) / float(stride\_z))
- ceil_mode=False:
.. math::
w = 1 + int(floor(input\_width + 2 * padding - pool\_size) / float(stride))
h = 1 + int(floor(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y))
d = 1 + int(floor(input\_depth + 2 * padding\_z - pool\_size\_z) / float(stride\_z))
The example usage is:
.. code-block:: python
maxpool = img_pool3d_layer(input=conv,
pool_size=3,
num_channels=8,
stride=1,
padding=1,
pool_type=MaxPooling())
:param padding: pooling padding width.
:type padding: int|tuple|list
:param name: name of pooling layer
:type name: basestring.
:param input: layer's input
:type input: LayerOutput
:param pool_size: pooling window width
:type pool_size: int|tuple|list
:param num_channels: number of input channel.
:type num_channels: int
:param pool_type: pooling type. MaxPooling or AvgPooling. Default is
MaxPooling.
:type pool_type: BasePoolingType
:param stride: stride width of pooling.
:type stride: int|tuple|list
:param layer_attr: Extra Layer attribute.
:type layer_attr: ExtraLayerAttribute
:param ceil_mode: Wether to use ceil mode to calculate output height and with.
Defalut is True. If set false, Otherwise use floor.
:type ceil_mode: bool
:return: LayerOutput object.
:rtype: LayerOutput
"""
if num_channels is None:
assert input.num_filters is not None
num_channels = input.num_filters
if pool_type is None:
pool_type = MaxPooling()
elif isinstance(pool_type, AvgPooling):
pool_type.name = 'avg'
type_name = pool_type.name + '-projection' \
if (
isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
else pool_type.name
if isinstance(pool_size, collections.Sequence):
assert len(pool_size) == 3
pool_size, pool_size_y, pool_size_z = pool_size
else:
pool_size_y = pool_size
pool_size_z = pool_size
if isinstance(stride, collections.Sequence):
assert len(stride) == 3
stride, stride_y, stride_z = stride
else:
stride_y = stride
stride_z = stride
if isinstance(padding, collections.Sequence):
assert len(padding) == 3
padding, padding_y, padding_y = padding
else:
padding_y = padding
padding_z = padding
l = Layer(
name=name,
type=LayerType.POOL3D_LAYER,
inputs=[
Input(
input.name,
pool=Pool3d(
pool_type=type_name,
channels=num_channels,
size_x=pool_size,
start=None,
stride=stride,
padding=padding,
size_y=pool_size_y,
stride_y=stride_y,
padding_y=padding_y,
size_z=pool_size_z,
stride_z=stride_z,
padding_z=padding_z))
],
ceil_mode=ceil_mode,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name,
LayerType.POOL_LAYER,
parents=[input],
num_filters=num_channels,
size=l.config.size)
@wrap_name_default("spp") @wrap_name_default("spp")
@layer_support() @layer_support()
def spp_layer(input, def spp_layer(input,
......
...@@ -9,6 +9,7 @@ test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer ...@@ -9,6 +9,7 @@ test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer
test_seq_slice_layer test_cross_entropy_over_beam test_conv3d_layer test_deconv3d_layer) test_seq_slice_layer test_cross_entropy_over_beam test_pooling3D_layer
test_conv3d_layer test_deconv3d_layer)
export whole_configs=(test_split_datasource) export whole_configs=(test_split_datasource)
type: "nn"
layers {
name: "data_2d"
type: "data"
size: 6000
active_type: ""
height: 20
width: 10
}
layers {
name: "pool___2d"
type: "pool"
size: 840
active_type: ""
inputs {
input_layer_name: "data_2d"
pool_conf {
pool_type: "avg-projection"
channels: 30
size_x: 5
stride: 3
output_x: 4
img_size: 10
padding: 1
size_y: 5
stride_y: 3
output_y: 7
img_size_y: 20
padding_y: 1
}
}
height: 7
width: 4
}
layers {
name: "data_3d_1"
type: "data"
size: 60000
active_type: ""
height: 20
width: 10
depth: 10
}
layers {
name: "pool_3d_1"
type: "pool3d"
size: 3360
active_type: ""
inputs {
input_layer_name: "data_3d_1"
pool_conf {
pool_type: "avg-projection"
channels: 30
size_x: 5
stride: 3
output_x: 4
img_size: 10
padding: 1
size_y: 5
stride_y: 3
output_y: 7
img_size_y: 20
padding_y: 1
size_z: 5
stride_z: 3
output_z: 4
img_size_z: 10
padding_z: 1
}
}
height: 7
width: 4
depth: 4
}
layers {
name: "pool_3d_2"
type: "pool3d"
size: 3360
active_type: ""
inputs {
input_layer_name: "data_3d_1"
pool_conf {
pool_type: "max-projection"
channels: 30
size_x: 5
stride: 3
output_x: 4
img_size: 10
padding: 1
size_y: 5
stride_y: 3
output_y: 7
img_size_y: 20
padding_y: 1
size_z: 5
stride_z: 3
output_z: 4
img_size_z: 10
padding_z: 1
}
}
height: 7
width: 4
depth: 4
}
input_layer_names: "data_2d"
output_layer_names: "pool___2d"
output_layer_names: "pool_3d_1"
output_layer_names: "pool_3d_2"
sub_models {
name: "root"
layer_names: "data_2d"
layer_names: "pool___2d"
layer_names: "data_3d_1"
layer_names: "pool_3d_1"
layer_names: "pool_3d_2"
input_layer_names: "data_2d"
output_layer_names: "pool___2d"
output_layer_names: "pool_3d_1"
output_layer_names: "pool_3d_2"
is_recurrent_layer_group: false
}
from paddle.trainer_config_helpers import *
settings(batch_size=100, learning_rate=1e-5)
data_2d = data_layer(name='data_2d', size=6000, height=20, width=10)
pool_2d = img_pool_layer(
name="pool___2d",
input=data_2d,
num_channels=30,
pool_size=5,
stride=3,
padding=1,
pool_type=AvgPooling())
outputs(pool_2d)
data_3d = data_layer(
name='data_3d_1', size=60000, depth=10, height=20, width=10)
pool_3d_1 = img_pool3d_layer(
name="pool_3d_1",
input=data_3d,
num_channels=30,
pool_size=5,
stride=3,
padding=1,
pool_type=AvgPooling())
outputs(pool_3d_1)
pool_3d_2 = img_pool3d_layer(
name="pool_3d_2",
input=data_3d,
num_channels=30,
pool_size=[5, 5, 5],
stride=[3, 3, 3],
padding=[1, 1, 1],
pool_type=MaxPooling())
outputs(pool_3d_2)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册