提交 b5a448f3 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #4154 from luotao1/avg_pool

refine avg-pooling, which is exclusive. refine related code.
...@@ -22,10 +22,10 @@ limitations under the License. */ ...@@ -22,10 +22,10 @@ limitations under the License. */
*/ */
typedef enum { typedef enum {
HL_POOLING_MAX = 0, HL_POOLING_MAX = 0,
// average includes padded values
HL_POOLING_AVERAGE = 1,
// average does not include padded values // average does not include padded values
HL_POOLING_AVERAGE_EXCLUDE_PADDING = 2, HL_POOLING_AVERAGE = 1,
// average includes padded values
HL_POOLING_AVERAGE_INCLUDE_PADDING = 2,
HL_POOLING_END HL_POOLING_END
} hl_pooling_mode_t; } hl_pooling_mode_t;
......
...@@ -211,13 +211,11 @@ __global__ void KeAvgPoolForward(const int nthreads, ...@@ -211,13 +211,11 @@ __global__ void KeAvgPoolForward(const int nthreads,
int hstart = ph * strideH - padH; int hstart = ph * strideH - padH;
int wstart = pw * strideW - padW; int wstart = pw * strideW - padW;
int hend = min(hstart + sizeY, height + padH); int hend = min(hstart + sizeY, height);
int wend = min(wstart + sizeX, width + padW); int wend = min(wstart + sizeX, width);
int pool_size = (hend - hstart) * (wend - wstart);
hstart = max(hstart, 0); hstart = max(hstart, 0);
wstart = max(wstart, 0); wstart = max(wstart, 0);
hend = min(hend, height); int pool_size = (hend - hstart) * (wend - wstart);
wend = min(wend, width);
real aveval = 0; real aveval = 0;
inputData += (frameNum * channels + c) * height * width; inputData += (frameNum * channels + c) * height * width;
...@@ -299,12 +297,14 @@ __global__ void KeAvgPoolBackward(const int nthreads, ...@@ -299,12 +297,14 @@ __global__ void KeAvgPoolBackward(const int nthreads,
outGrad += (frameNum * outStride + offsetC * pooledH * pooledW); outGrad += (frameNum * outStride + offsetC * pooledH * pooledW);
for (int ph = phstart; ph < phend; ++ph) { for (int ph = phstart; ph < phend; ++ph) {
int hstart = ph * strideH - padH;
int hend = min(hstart + sizeY, height);
hstart = max(hstart, 0);
for (int pw = pwstart; pw < pwend; ++pw) { for (int pw = pwstart; pw < pwend; ++pw) {
// figure out the pooling size // figure out the pooling size
int hstart = ph * strideH - padH;
int wstart = pw * strideW - padW; int wstart = pw * strideW - padW;
int hend = min(hstart + sizeY, height + padH); int wend = min(wstart + sizeX, width);
int wend = min(wstart + sizeX, width + padW); wstart = max(wstart, 0);
int poolsize = (hend - hstart) * (wend - wstart); int poolsize = (hend - hstart) * (wend - wstart);
gradient += outGrad[ph * pooledW + pw] / poolsize; gradient += outGrad[ph * pooledW + pw] / poolsize;
} }
...@@ -600,16 +600,13 @@ __global__ void KeAvgPool3DForward(const int nthreads, ...@@ -600,16 +600,13 @@ __global__ void KeAvgPool3DForward(const int nthreads,
int dstart = pd * strideD - padD; int dstart = pd * strideD - padD;
int hstart = ph * strideH - padH; int hstart = ph * strideH - padH;
int wstart = pw * strideW - padW; int wstart = pw * strideW - padW;
int dend = min(dstart + sizeZ, depth + padD); int dend = min(dstart + sizeZ, depth);
int hend = min(hstart + sizeY, height + padH); int hend = min(hstart + sizeY, height);
int wend = min(wstart + sizeX, width + padW); int wend = min(wstart + sizeX, width);
int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
dstart = max(dstart, 0); dstart = max(dstart, 0);
hstart = max(hstart, 0); hstart = max(hstart, 0);
wstart = max(wstart, 0); wstart = max(wstart, 0);
dend = min(dend, depth); int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
hend = min(hend, height);
wend = min(wend, width);
real aveval = 0; real aveval = 0;
inputData += (frameNum * channels + c) * depth * height * width; inputData += (frameNum * channels + c) * depth * height * width;
...@@ -712,15 +709,18 @@ __global__ void KeAvgPool3DBackward(const int nthreads, ...@@ -712,15 +709,18 @@ __global__ void KeAvgPool3DBackward(const int nthreads,
outGrad += (frameNum * channels + offsetC) * pooledD * pooledH * pooledW; outGrad += (frameNum * channels + offsetC) * pooledD * pooledH * pooledW;
for (int pd = pdstart; pd < pdend; ++pd) { for (int pd = pdstart; pd < pdend; ++pd) {
int dstart = pd * strideD - padD;
int dend = min(dstart + sizeZ, depth);
dstart = max(dstart, 0);
for (int ph = phstart; ph < phend; ++ph) { for (int ph = phstart; ph < phend; ++ph) {
int hstart = ph * strideH - padH;
int hend = min(hstart + sizeY, height);
hstart = max(hstart, 0);
for (int pw = pwstart; pw < pwend; ++pw) { for (int pw = pwstart; pw < pwend; ++pw) {
// figure out the pooling size // figure out the pooling size
int dstart = pd * strideD - padD;
int hstart = ph * strideH - padH;
int wstart = pw * strideW - padW; int wstart = pw * strideW - padW;
int dend = min(dstart + sizeZ, depth + padD); int wend = min(wstart + sizeX, width);
int hend = min(hstart + sizeY, height + padH); wstart = max(wstart, 0);
int wend = min(wstart + sizeX, width + padW);
int poolsize = (dend - dstart) * (hend - hstart) * (wend - wstart); int poolsize = (dend - dstart) * (hend - hstart) * (wend - wstart);
gradient += outGrad[(pd * pooledH + ph) * pooledW + pw] / poolsize; gradient += outGrad[(pd * pooledH + ph) * pooledW + pw] / poolsize;
} }
......
...@@ -432,11 +432,11 @@ void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc, ...@@ -432,11 +432,11 @@ void hl_create_pooling_descriptor(hl_pooling_descriptor* pooling_desc,
cudnn_mode = CUDNN_POOLING_MAX; cudnn_mode = CUDNN_POOLING_MAX;
break; break;
case HL_POOLING_AVERAGE: case HL_POOLING_AVERAGE:
cudnn_mode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
break;
case HL_POOLING_AVERAGE_EXCLUDE_PADDING:
cudnn_mode = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; cudnn_mode = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
break; break;
case HL_POOLING_AVERAGE_INCLUDE_PADDING:
cudnn_mode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
break;
default: default:
LOG(FATAL) << "parameter mode error"; LOG(FATAL) << "parameter mode error";
} }
......
...@@ -29,9 +29,9 @@ bool CudnnPoolLayer::typeCheck(const std::string &poolType, ...@@ -29,9 +29,9 @@ bool CudnnPoolLayer::typeCheck(const std::string &poolType,
if (mode) { if (mode) {
*mode = HL_POOLING_AVERAGE; *mode = HL_POOLING_AVERAGE;
} }
} else if (poolType == "cudnn-avg-excl-pad-pool") { } else if (poolType == "cudnn-avg-incl-pad-pool") {
if (mode) { if (mode) {
*mode = HL_POOLING_AVERAGE_EXCLUDE_PADDING; *mode = HL_POOLING_AVERAGE_INCLUDE_PADDING;
} }
} else { } else {
return false; return false;
......
...@@ -1033,17 +1033,15 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -1033,17 +1033,15 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat,
real* inputData = inputMat.getData(); real* inputData = inputMat.getData();
size_t frameNum = inputMat.getHeight(); size_t frameNum = inputMat.getHeight();
size_t width = imgSizeW; CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth());
size_t height = imgSizeH;
CHECK(height * width * channels == inputMat.getWidth());
CHECK(height_ == inputMat.getHeight()); CHECK(height_ == inputMat.getHeight());
CHECK(width_ == outputH * outputW * channels); CHECK(width_ == outputH * outputW * channels);
hl_maxpool_forward(frameNum, hl_maxpool_forward(frameNum,
inputData, inputData,
channels, channels,
height, imgSizeH,
width, imgSizeW,
outputH, outputH,
outputW, outputW,
sizeX, sizeX,
...@@ -1080,11 +1078,8 @@ void GpuMatrix::maxPoolBackward(Matrix& inputMat, ...@@ -1080,11 +1078,8 @@ void GpuMatrix::maxPoolBackward(Matrix& inputMat,
real* outDiff = outGrad.getData(); real* outDiff = outGrad.getData();
size_t frameNum = inputMat.getHeight(); size_t frameNum = inputMat.getHeight();
size_t channels = outV.getWidth() / outputH / outputW; size_t channels = outV.getWidth() / outputH / outputW;
size_t width = imgSizeW; CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth());
size_t height = imgSizeH;
CHECK(height * width * channels == inputMat.getWidth());
CHECK(height_ == inputMat.getHeight()); CHECK(height_ == inputMat.getHeight());
CHECK(width_ == width * height * channels);
CHECK(outGrad.getHeight() == outV.getHeight() && CHECK(outGrad.getHeight() == outV.getHeight() &&
outGrad.getWidth() == outV.getWidth()); outGrad.getWidth() == outV.getWidth());
...@@ -1093,8 +1088,8 @@ void GpuMatrix::maxPoolBackward(Matrix& inputMat, ...@@ -1093,8 +1088,8 @@ void GpuMatrix::maxPoolBackward(Matrix& inputMat,
outData, outData,
outDiff, outDiff,
channels, channels,
height, imgSizeH,
width, imgSizeW,
outputH, outputH,
outputW, outputW,
sizeX, sizeX,
...@@ -1125,17 +1120,15 @@ void GpuMatrix::avgPoolForward(Matrix& inputMat, ...@@ -1125,17 +1120,15 @@ void GpuMatrix::avgPoolForward(Matrix& inputMat,
real* inputData = inputMat.getData(); real* inputData = inputMat.getData();
size_t frameNum = inputMat.getHeight(); size_t frameNum = inputMat.getHeight();
size_t height = imgSizeH; CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth());
size_t width = imgSizeW;
CHECK(height * width * channels == inputMat.getWidth());
CHECK(height_ == inputMat.getHeight()); CHECK(height_ == inputMat.getHeight());
CHECK(width_ == outputH * outputW * channels); CHECK(width_ == outputH * outputW * channels);
hl_avgpool_forward(frameNum, hl_avgpool_forward(frameNum,
inputData, inputData,
channels, channels,
height, imgSizeH,
width, imgSizeW,
outputH, outputH,
outputW, outputW,
sizeX, sizeX,
...@@ -1166,17 +1159,15 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, ...@@ -1166,17 +1159,15 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad,
real* outDiff = outGrad.getData(); real* outDiff = outGrad.getData();
size_t frameNum = outGrad.getHeight(); size_t frameNum = outGrad.getHeight();
size_t channels = outGrad.getWidth() / outputH / outputW; size_t channels = outGrad.getWidth() / outputH / outputW;
size_t height = imgSizeH; CHECK(imgSizeH * imgSizeW * channels == width_);
size_t width = imgSizeW;
CHECK(height * width * channels == width_);
CHECK(height_ == outGrad.getHeight()); CHECK(height_ == outGrad.getHeight());
CHECK(outGrad.getWidth() == outputH * outputW * channels); CHECK(outGrad.getWidth() == outputH * outputW * channels);
hl_avgpool_backward(frameNum, hl_avgpool_backward(frameNum,
outDiff, outDiff,
channels, channels,
height, imgSizeH,
width, imgSizeW,
outputH, outputH,
outputW, outputW,
sizeX, sizeX,
...@@ -1214,19 +1205,16 @@ void GpuMatrix::maxPool3DForward(Matrix& inputMat, ...@@ -1214,19 +1205,16 @@ void GpuMatrix::maxPool3DForward(Matrix& inputMat,
real* inputData = inputMat.getData(); real* inputData = inputMat.getData();
real* maxPoolIdxData = maxPoolIdx.getData(); real* maxPoolIdxData = maxPoolIdx.getData();
size_t num = inputMat.getHeight(); size_t num = inputMat.getHeight();
size_t width = imgSizeW; CHECK(imgSizeD * imgSizeH * imgSizeW * channels == inputMat.getWidth());
size_t height = imgSizeH;
size_t depth = imgSizeD;
CHECK(depth * height * width * channels == inputMat.getWidth());
CHECK(height_ == inputMat.getHeight()); CHECK(height_ == inputMat.getHeight());
CHECK(width_ == outputD * outputH * outputW * channels); CHECK(width_ == outputD * outputH * outputW * channels);
hl_maxpool3D_forward(num, hl_maxpool3D_forward(num,
inputData, inputData,
channels, channels,
depth, imgSizeD,
height, imgSizeH,
width, imgSizeW,
outputD, outputD,
outputH, outputH,
outputW, outputW,
...@@ -1269,20 +1257,16 @@ void GpuMatrix::maxPool3DBackward(Matrix& outGrad, ...@@ -1269,20 +1257,16 @@ void GpuMatrix::maxPool3DBackward(Matrix& outGrad,
real* maxPoolIdxData = maxPoolIdx.getData(); real* maxPoolIdxData = maxPoolIdx.getData();
size_t frameNum = getHeight(); size_t frameNum = getHeight();
size_t channels = outGrad.getWidth() / outputD / outputH / outputW; size_t channels = outGrad.getWidth() / outputD / outputH / outputW;
size_t width = imgSizeW; CHECK(imgSizeD * imgSizeH * imgSizeW * channels == getWidth());
size_t height = imgSizeH;
size_t depth = imgSizeD;
CHECK(depth * height * width * channels == getWidth());
CHECK(width_ == depth * width * height * channels);
CHECK(outGrad.getHeight() == maxPoolIdx.getHeight() && CHECK(outGrad.getHeight() == maxPoolIdx.getHeight() &&
outGrad.getWidth() == maxPoolIdx.getWidth()); outGrad.getWidth() == maxPoolIdx.getWidth());
hl_maxpool3D_backward(frameNum, hl_maxpool3D_backward(frameNum,
outDiff, outDiff,
channels, channels,
depth, imgSizeD,
height, imgSizeH,
width, imgSizeW,
outputD, outputD,
outputH, outputH,
outputW, outputW,
...@@ -1323,19 +1307,16 @@ void GpuMatrix::avgPool3DForward(Matrix& inputMat, ...@@ -1323,19 +1307,16 @@ void GpuMatrix::avgPool3DForward(Matrix& inputMat,
real* inputData = inputMat.getData(); real* inputData = inputMat.getData();
size_t frameNum = inputMat.getHeight(); size_t frameNum = inputMat.getHeight();
size_t height = imgSizeH; CHECK(imgSizeD * imgSizeH * imgSizeW * channels == inputMat.getWidth());
size_t width = imgSizeW;
size_t depth = imgSizeD;
CHECK(depth * height * width * channels == inputMat.getWidth());
CHECK(height_ == inputMat.getHeight()); CHECK(height_ == inputMat.getHeight());
CHECK(width_ == outputD * outputH * outputW * channels); CHECK(width_ == outputD * outputH * outputW * channels);
hl_avgpool3D_forward(frameNum, hl_avgpool3D_forward(frameNum,
inputData, inputData,
channels, channels,
depth, imgSizeD,
height, imgSizeH,
width, imgSizeW,
outputD, outputD,
outputH, outputH,
outputW, outputW,
...@@ -1375,19 +1356,16 @@ void GpuMatrix::avgPool3DBackward(Matrix& outGrad, ...@@ -1375,19 +1356,16 @@ void GpuMatrix::avgPool3DBackward(Matrix& outGrad,
real* outDiff = outGrad.getData(); real* outDiff = outGrad.getData();
size_t frameNum = outGrad.getHeight(); size_t frameNum = outGrad.getHeight();
size_t channels = outGrad.getWidth() / outputD / outputH / outputW; size_t channels = outGrad.getWidth() / outputD / outputH / outputW;
size_t height = imgSizeH; CHECK(imgSizeD * imgSizeH * imgSizeW * channels == width_);
size_t width = imgSizeW;
size_t depth = imgSizeD;
CHECK(depth * height * width * channels == width_);
CHECK(height_ == outGrad.getHeight()); CHECK(height_ == outGrad.getHeight());
CHECK(outGrad.getWidth() == outputD * outputH * outputW * channels); CHECK(outGrad.getWidth() == outputD * outputH * outputW * channels);
hl_avgpool3D_backward(frameNum, hl_avgpool3D_backward(frameNum,
outDiff, outDiff,
channels, channels,
depth, imgSizeD,
height, imgSizeH,
width, imgSizeW,
outputD, outputD,
outputH, outputH,
outputW, outputW,
...@@ -1999,11 +1977,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -1999,11 +1977,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
real* inputData = inputMat.getData(); real* inputData = inputMat.getData();
real* outData = data_; real* outData = data_;
size_t num = inputMat.getHeight(); size_t num = inputMat.getHeight();
size_t inWidth = imgSizeW; size_t inLength = imgSizeH * imgSizeW;
size_t inHeight = imgSizeH; size_t outLength = outputH * outputW;
CHECK(inHeight * inWidth == inputMat.getWidth() / channels); CHECK(inLength == inputMat.getWidth() / channels);
CHECK_EQ(num, this->getHeight()); CHECK_EQ(num, this->getHeight());
CHECK_EQ(channels * outputH * outputW, this->getWidth()); CHECK_EQ(channels * outLength, this->getWidth());
size_t outStride = getStride(); size_t outStride = getStride();
/* initialize the data_ */ /* initialize the data_ */
...@@ -2020,24 +1998,24 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -2020,24 +1998,24 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
} }
for (size_t c = 0; c < channels; ++c) { // channel by channel for (size_t c = 0; c < channels; ++c) { // channel by channel
for (size_t ph = 0; ph < outputH; ++ph) { for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
int hstart = ph * strideH - paddingH; int hstart = ph * strideH - paddingH;
int wstart = pw * strideW - paddingW; int hend = std::min(hstart + sizeY, imgSizeH);
int hend = std::min(hstart + sizeY, inHeight);
int wend = std::min(wstart + sizeX, inWidth);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
outData[ph * outputW + pw] = std::max(outData[ph * outputW + pw], outData[ph * outputW + pw] = std::max(
inputData[h * inWidth + w]); outData[ph * outputW + pw], inputData[h * imgSizeW + w]);
} }
} }
} }
} }
// compute offset // compute offset
inputData += inHeight * inWidth; inputData += inLength;
outData += outputH * outputW; outData += outLength;
} }
} }
} }
...@@ -2058,8 +2036,10 @@ void CpuMatrix::maxPoolBackward(Matrix& image, ...@@ -2058,8 +2036,10 @@ void CpuMatrix::maxPoolBackward(Matrix& image,
size_t paddingH, size_t paddingH,
size_t paddingW) { size_t paddingW) {
size_t num = image.getHeight(); size_t num = image.getHeight();
size_t channels = size_t(width_ / imgSizeH / imgSizeW); size_t inLength = imgSizeH * imgSizeW;
CHECK(image.getWidth() == imgSizeH * imgSizeW * channels); size_t outLength = outputH * outputW;
size_t channels = size_t(width_ / inLength);
CHECK(image.getWidth() == inLength * channels);
CHECK(image.getHeight() == height_ && image.getWidth() == width_); CHECK(image.getHeight() == height_ && image.getWidth() == width_);
CHECK(outV.getHeight() == outGrad.getHeight() && CHECK(outV.getHeight() == outGrad.getHeight() &&
outV.getWidth() == outGrad.getWidth()); outV.getWidth() == outGrad.getWidth());
...@@ -2080,12 +2060,12 @@ void CpuMatrix::maxPoolBackward(Matrix& image, ...@@ -2080,12 +2060,12 @@ void CpuMatrix::maxPoolBackward(Matrix& image,
} }
for (size_t c = 0; c < channels; ++c) { for (size_t c = 0; c < channels; ++c) {
for (size_t ph = 0; ph < outputH; ++ph) { for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
int hstart = ph * strideH - paddingH; int hstart = ph * strideH - paddingH;
int wstart = pw * strideW - paddingW;
int hend = std::min(hstart + sizeY, imgSizeH); int hend = std::min(hstart + sizeY, imgSizeH);
int wend = std::min(wstart + sizeX, imgSizeW);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
...@@ -2098,10 +2078,10 @@ void CpuMatrix::maxPoolBackward(Matrix& image, ...@@ -2098,10 +2078,10 @@ void CpuMatrix::maxPoolBackward(Matrix& image,
} }
} }
// offset // offset
inData += imgSizeH * imgSizeW; inData += inLength;
tgtGrad += imgSizeH * imgSizeW; tgtGrad += inLength;
otData += outputH * outputW; otData += outLength;
otGrad += outputH * outputW; otGrad += outLength;
} }
} }
} }
...@@ -2120,10 +2100,10 @@ void CpuMatrix::avgPoolForward(Matrix& input, ...@@ -2120,10 +2100,10 @@ void CpuMatrix::avgPoolForward(Matrix& input,
size_t paddingW) { size_t paddingW) {
// The main loop // The main loop
size_t num = input.getHeight(); size_t num = input.getHeight();
size_t inHeight = imgSizeH; size_t inLength = imgSizeH * imgSizeW;
size_t inWidth = imgSizeW; size_t outLength = outputH * outputW;
CHECK(inHeight * inWidth * channels == input.getWidth()); CHECK(inLength * channels == input.getWidth());
CHECK(outputH * outputW * channels * num == height_ * width_); CHECK(outLength * channels * num == height_ * width_);
real* tgtData = data_; real* tgtData = data_;
real* inData = input.getData(); real* inData = input.getData();
...@@ -2133,30 +2113,27 @@ void CpuMatrix::avgPoolForward(Matrix& input, ...@@ -2133,30 +2113,27 @@ void CpuMatrix::avgPoolForward(Matrix& input,
} }
for (size_t c = 0; c < channels; ++c) { for (size_t c = 0; c < channels; ++c) {
for (size_t ph = 0; ph < outputH; ++ph) { for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
int hstart = ph * strideH - paddingH; int hstart = ph * strideH - paddingH;
int wstart = pw * strideW - paddingW; int hend = std::min(hstart + sizeY, imgSizeH);
int hend = std::min(hstart + sizeY, inHeight + paddingH);
int wend = std::min(wstart + sizeX, inWidth + paddingW);
int poolSize = (hend - hstart) * (wend - wstart);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
hend = std::min(hend, static_cast<int>(inHeight));
wend = std::min(wend, static_cast<int>(inWidth));
CHECK(poolSize);
tgtData[ph * outputW + pw] = 0; // clear tgtData[ph * outputW + pw] = 0; // clear
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
tgtData[ph * outputW + pw] += inData[h * inWidth + w]; tgtData[ph * outputW + pw] += inData[h * imgSizeW + w];
} }
} }
int poolSize = (hend - hstart) * (wend - wstart);
CHECK(poolSize);
tgtData[ph * outputW + pw] /= poolSize; tgtData[ph * outputW + pw] /= poolSize;
} }
} }
// compute offset // compute offset
inData += inHeight * inWidth; inData += inLength;
tgtData += outputH * outputW; tgtData += outLength;
} }
} }
} }
...@@ -2176,7 +2153,9 @@ void CpuMatrix::avgPoolBackward(Matrix& input, ...@@ -2176,7 +2153,9 @@ void CpuMatrix::avgPoolBackward(Matrix& input,
size_t paddingW) { size_t paddingW) {
size_t num = input.getHeight(); size_t num = input.getHeight();
size_t channels = input.getWidth() / outputH / outputW; size_t channels = input.getWidth() / outputH / outputW;
CHECK(imgSizeH * imgSizeW * channels == getWidth()); size_t inLength = imgSizeH * imgSizeW;
size_t outLength = outputH * outputW;
CHECK(inLength * channels == getWidth());
real* inData = input.getData(); real* inData = input.getData();
real* outData = getData(); real* outData = getData();
...@@ -2186,16 +2165,14 @@ void CpuMatrix::avgPoolBackward(Matrix& input, ...@@ -2186,16 +2165,14 @@ void CpuMatrix::avgPoolBackward(Matrix& input,
} }
for (size_t c = 0; c < channels; ++c) { for (size_t c = 0; c < channels; ++c) {
for (size_t ph = 0; ph < outputH; ++ph) { for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
int hstart = ph * strideH - paddingH; int hstart = ph * strideH - paddingH;
int wstart = pw * strideW - paddingW; int hend = std::min(hstart + sizeY, imgSizeH);
int hend = std::min(hstart + sizeY, imgSizeH + paddingH);
int wend = std::min(wstart + sizeX, imgSizeW + paddingW);
int poolSize = (hend - hstart) * (wend - wstart);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
hend = std::min(hend, static_cast<int>(imgSizeH)); int poolSize = (hend - hstart) * (wend - wstart);
wend = std::min(wend, static_cast<int>(imgSizeW));
CHECK(poolSize); CHECK(poolSize);
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
...@@ -2206,8 +2183,8 @@ void CpuMatrix::avgPoolBackward(Matrix& input, ...@@ -2206,8 +2183,8 @@ void CpuMatrix::avgPoolBackward(Matrix& input,
} }
} }
// offset // offset
outData += imgSizeH * imgSizeW; outData += inLength;
inData += outputH * outputW; inData += outLength;
} }
} }
} }
...@@ -2234,12 +2211,11 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, ...@@ -2234,12 +2211,11 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
real* outData = getData(); real* outData = getData();
real* maxPoolIdxData = maxPoolIdx.getData(); real* maxPoolIdxData = maxPoolIdx.getData();
size_t num = inputMat.getHeight(); size_t num = inputMat.getHeight();
size_t inWidth = imgSizeW; size_t inLength = imgSizeH * imgSizeW * imgSizeD;
size_t inHeight = imgSizeH; size_t outLength = outputH * outputW * outputD;
size_t inDepth = imgSizeD; CHECK(inLength == inputMat.getWidth() / channels);
CHECK(inHeight * inWidth * inDepth == inputMat.getWidth() / channels);
CHECK_EQ(num, this->getHeight()); CHECK_EQ(num, this->getHeight());
CHECK_EQ(channels * outputH * outputW * outputD, this->getWidth()); CHECK_EQ(channels * outLength, this->getWidth());
size_t outStride = getStride(); size_t outStride = getStride();
/* initialize the data_ */ /* initialize the data_ */
...@@ -2258,16 +2234,16 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, ...@@ -2258,16 +2234,16 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
} }
for (size_t c = 0; c < channels; ++c) { // channel by channel for (size_t c = 0; c < channels; ++c) { // channel by channel
for (size_t pd = 0; pd < outputD; ++pd) { for (size_t pd = 0; pd < outputD; ++pd) {
for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
int dstart = pd * strideD - paddingD; int dstart = pd * strideD - paddingD;
int hstart = ph * strideH - paddingH; int dend = std::min(dstart + sizeZ, imgSizeD);
int wstart = pw * strideW - paddingW;
int dend = std::min(dstart + sizeZ, inDepth);
int hend = std::min(hstart + sizeY, inHeight);
int wend = std::min(wstart + sizeX, inWidth);
dstart = std::max(dstart, 0); dstart = std::max(dstart, 0);
for (size_t ph = 0; ph < outputH; ++ph) {
int hstart = ph * strideH - paddingH;
int hend = std::min(hstart + sizeY, imgSizeH);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
int maxIdx = -1; int maxIdx = -1;
real maxOutData = outData[(pd * outputH + ph) * outputW + pw]; real maxOutData = outData[(pd * outputH + ph) * outputW + pw];
...@@ -2275,9 +2251,9 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, ...@@ -2275,9 +2251,9 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
if (maxOutData < if (maxOutData <
inputData[(d * inHeight + h) * inWidth + w]) { inputData[(d * imgSizeH + h) * imgSizeW + w]) {
maxOutData = inputData[(d * inHeight + h) * inWidth + w]; maxOutData = inputData[(d * imgSizeH + h) * imgSizeW + w];
maxIdx = (d * inHeight + h) * inWidth + w; maxIdx = (d * imgSizeH + h) * imgSizeW + w;
} }
} }
} }
...@@ -2288,9 +2264,9 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, ...@@ -2288,9 +2264,9 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
} }
} }
// compute offset // compute offset
inputData += inDepth * inHeight * inWidth; inputData += inLength;
outData += outputD * outputH * outputW; outData += outLength;
maxPoolIdxData += outputD * outputH * outputW; maxPoolIdxData += outLength;
} }
} }
} }
...@@ -2315,7 +2291,9 @@ void CpuMatrix::maxPool3DBackward(Matrix& outGrad, ...@@ -2315,7 +2291,9 @@ void CpuMatrix::maxPool3DBackward(Matrix& outGrad,
real scaleTargets, real scaleTargets,
real scaleOutput) { real scaleOutput) {
size_t num = getHeight(); size_t num = getHeight();
size_t channels = size_t(width_ / imgSizeD / imgSizeH / imgSizeW); size_t inLength = imgSizeH * imgSizeW * imgSizeD;
size_t outLength = outputH * outputW * outputD;
size_t channels = size_t(width_ / inLength);
CHECK(maxPoolIdx.getHeight() == outGrad.getHeight() && CHECK(maxPoolIdx.getHeight() == outGrad.getHeight() &&
maxPoolIdx.getWidth() == outGrad.getWidth()); maxPoolIdx.getWidth() == outGrad.getWidth());
...@@ -2341,9 +2319,9 @@ void CpuMatrix::maxPool3DBackward(Matrix& outGrad, ...@@ -2341,9 +2319,9 @@ void CpuMatrix::maxPool3DBackward(Matrix& outGrad,
} }
} }
// offset // offset
tgtGrad += imgSizeD * imgSizeH * imgSizeW; tgtGrad += inLength;
otGrad += outputD * outputH * outputW; otGrad += outLength;
maxPoolIdxData += outputD * outputH * outputW; maxPoolIdxData += outLength;
} }
} }
} }
...@@ -2367,11 +2345,10 @@ void CpuMatrix::avgPool3DForward(Matrix& input, ...@@ -2367,11 +2345,10 @@ void CpuMatrix::avgPool3DForward(Matrix& input,
size_t paddingW) { size_t paddingW) {
// The main loop // The main loop
size_t num = input.getHeight(); size_t num = input.getHeight();
size_t inDepth = imgSizeD; size_t inLength = imgSizeH * imgSizeW * imgSizeD;
size_t inHeight = imgSizeH; size_t outLength = outputH * outputW * outputD;
size_t inWidth = imgSizeW; CHECK(inLength * channels == input.getWidth());
CHECK(inDepth * inHeight * inWidth * channels == input.getWidth()); CHECK(outLength * channels * num == height_ * width_);
CHECK(outputD * outputH * outputW * channels * num == height_ * width_);
real* tgtData = getData(); real* tgtData = getData();
real* inData = input.getData(); real* inData = input.getData();
...@@ -2381,39 +2358,36 @@ void CpuMatrix::avgPool3DForward(Matrix& input, ...@@ -2381,39 +2358,36 @@ void CpuMatrix::avgPool3DForward(Matrix& input,
} }
for (size_t c = 0; c < channels; ++c) { for (size_t c = 0; c < channels; ++c) {
for (size_t pd = 0; pd < outputD; ++pd) { for (size_t pd = 0; pd < outputD; ++pd) {
for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
int dstart = pd * strideD - paddingD; int dstart = pd * strideD - paddingD;
int hstart = ph * strideH - paddingH; int dend = std::min(dstart + sizeZ, imgSizeD);
int wstart = pw * strideW - paddingW;
int dend = std::min(dstart + sizeZ, inDepth + paddingD);
int hend = std::min(hstart + sizeY, inHeight + paddingH);
int wend = std::min(wstart + sizeX, inWidth + paddingW);
int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart);
dstart = std::max(dstart, 0); dstart = std::max(dstart, 0);
for (size_t ph = 0; ph < outputH; ++ph) {
int hstart = ph * strideH - paddingH;
int hend = std::min(hstart + sizeY, imgSizeH);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
dend = std::min(dend, static_cast<int>(inDepth));
hend = std::min(hend, static_cast<int>(inHeight));
wend = std::min(wend, static_cast<int>(inWidth));
CHECK(poolSize);
tgtData[(pd * outputH + ph) * outputW + pw] = 0; // clear tgtData[(pd * outputH + ph) * outputW + pw] = 0; // clear
for (int d = dstart; d < dend; ++d) { for (int d = dstart; d < dend; ++d) {
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
tgtData[(pd * outputH + ph) * outputW + pw] += tgtData[(pd * outputH + ph) * outputW + pw] +=
inData[(d * inHeight + h) * inWidth + w]; inData[(d * imgSizeH + h) * imgSizeW + w];
} }
} }
} }
int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart);
CHECK(poolSize);
tgtData[(pd * outputH + ph) * outputW + pw] /= poolSize; tgtData[(pd * outputH + ph) * outputW + pw] /= poolSize;
} }
} }
} }
// compute offset // compute offset
inData += inDepth * inHeight * inWidth; inData += inLength;
tgtData += outputD * outputH * outputW; tgtData += outLength;
} }
} }
} }
...@@ -2437,8 +2411,10 @@ void CpuMatrix::avgPool3DBackward(Matrix& input, ...@@ -2437,8 +2411,10 @@ void CpuMatrix::avgPool3DBackward(Matrix& input,
real scaleTargets, real scaleTargets,
real scaleOutput) { real scaleOutput) {
size_t num = input.getHeight(); size_t num = input.getHeight();
size_t channels = input.getWidth() / outputD / outputH / outputW; size_t inLength = imgSizeH * imgSizeW * imgSizeD;
CHECK(imgSizeD * imgSizeH * imgSizeW * channels == getWidth()); size_t outLength = outputH * outputW * outputD;
size_t channels = input.getWidth() / outLength;
CHECK(inLength * channels == getWidth());
real* inData = input.getData(); real* inData = input.getData();
real* outData = getData(); real* outData = getData();
...@@ -2448,21 +2424,18 @@ void CpuMatrix::avgPool3DBackward(Matrix& input, ...@@ -2448,21 +2424,18 @@ void CpuMatrix::avgPool3DBackward(Matrix& input,
} }
for (size_t c = 0; c < channels; ++c) { for (size_t c = 0; c < channels; ++c) {
for (size_t pd = 0; pd < outputD; ++pd) { for (size_t pd = 0; pd < outputD; ++pd) {
for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
int dstart = pd * strideD - paddingD; int dstart = pd * strideD - paddingD;
int hstart = ph * strideH - paddingH; int dend = std::min(dstart + sizeZ, imgSizeD);
int wstart = pw * strideW - paddingW;
int dend = std::min(dstart + sizeZ, imgSizeD + paddingD);
int hend = std::min(hstart + sizeY, imgSizeH + paddingH);
int wend = std::min(wstart + sizeX, imgSizeW + paddingW);
int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart);
dstart = std::max(dstart, 0); dstart = std::max(dstart, 0);
for (size_t ph = 0; ph < outputH; ++ph) {
int hstart = ph * strideH - paddingH;
int hend = std::min(hstart + sizeY, imgSizeH);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
dend = std::min(dend, static_cast<int>(imgSizeD)); int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart);
hend = std::min(hend, static_cast<int>(imgSizeH));
wend = std::min(wend, static_cast<int>(imgSizeW));
CHECK(poolSize); CHECK(poolSize);
for (int d = dstart; d < dend; ++d) { for (int d = dstart; d < dend; ++d) {
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
...@@ -2476,8 +2449,8 @@ void CpuMatrix::avgPool3DBackward(Matrix& input, ...@@ -2476,8 +2449,8 @@ void CpuMatrix::avgPool3DBackward(Matrix& input,
} }
} }
// offset // offset
outData += imgSizeD * imgSizeH * imgSizeW; outData += inLength;
inData += outputD * outputH * outputW; inData += outLength;
} }
} }
} }
......
...@@ -825,9 +825,8 @@ void testMaxPoolFwdBwd(int numSamples, ...@@ -825,9 +825,8 @@ void testMaxPoolFwdBwd(int numSamples,
int strideW, int strideW,
int padH, int padH,
int padW) { int padW) {
int outH = 0, outW = 0; int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true);
outH = (imgSizeH - ksizeH + 2 * padH + strideH - 1) / strideH + 1; int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true);
outW = (imgSizeW - ksizeW + 2 * padW + strideW - 1) / strideW + 1;
int inWidth = imgSizeH * imgSizeW * channels; int inWidth = imgSizeH * imgSizeW * channels;
MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
...@@ -927,9 +926,8 @@ void testAvgPoolFwdBwd(int numSamples, ...@@ -927,9 +926,8 @@ void testAvgPoolFwdBwd(int numSamples,
int strideW, int strideW,
int padH, int padH,
int padW) { int padW) {
int outH = 0, outW = 0; int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true);
outH = (imgSizeH - ksizeH + 2 * padH + strideH - 1) / strideH + 1; int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true);
outW = (imgSizeW - ksizeW + 2 * padW + strideW - 1) / strideW + 1;
int inWidth = imgSizeH * imgSizeW * channels; int inWidth = imgSizeH * imgSizeW * channels;
MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册