提交 8067a42b 编写于 作者: L Luo Tao

refine avg-pooling, which is exclusive. refine related code.

上级 59c48f98
...@@ -211,13 +211,11 @@ __global__ void KeAvgPoolForward(const int nthreads, ...@@ -211,13 +211,11 @@ __global__ void KeAvgPoolForward(const int nthreads,
int hstart = ph * strideH - padH; int hstart = ph * strideH - padH;
int wstart = pw * strideW - padW; int wstart = pw * strideW - padW;
int hend = min(hstart + sizeY, height + padH); int hend = min(hstart + sizeY, height);
int wend = min(wstart + sizeX, width + padW); int wend = min(wstart + sizeX, width);
int pool_size = (hend - hstart) * (wend - wstart);
hstart = max(hstart, 0); hstart = max(hstart, 0);
wstart = max(wstart, 0); wstart = max(wstart, 0);
hend = min(hend, height); int pool_size = (hend - hstart) * (wend - wstart);
wend = min(wend, width);
real aveval = 0; real aveval = 0;
inputData += (frameNum * channels + c) * height * width; inputData += (frameNum * channels + c) * height * width;
...@@ -299,11 +297,11 @@ __global__ void KeAvgPoolBackward(const int nthreads, ...@@ -299,11 +297,11 @@ __global__ void KeAvgPoolBackward(const int nthreads,
outGrad += (frameNum * outStride + offsetC * pooledH * pooledW); outGrad += (frameNum * outStride + offsetC * pooledH * pooledW);
for (int ph = phstart; ph < phend; ++ph) { for (int ph = phstart; ph < phend; ++ph) {
int hstart = ph * strideH - padH;
int hend = min(hstart + sizeY, height + padH);
for (int pw = pwstart; pw < pwend; ++pw) { for (int pw = pwstart; pw < pwend; ++pw) {
// figure out the pooling size // figure out the pooling size
int hstart = ph * strideH - padH;
int wstart = pw * strideW - padW; int wstart = pw * strideW - padW;
int hend = min(hstart + sizeY, height + padH);
int wend = min(wstart + sizeX, width + padW); int wend = min(wstart + sizeX, width + padW);
int poolsize = (hend - hstart) * (wend - wstart); int poolsize = (hend - hstart) * (wend - wstart);
gradient += outGrad[ph * pooledW + pw] / poolsize; gradient += outGrad[ph * pooledW + pw] / poolsize;
...@@ -600,16 +598,13 @@ __global__ void KeAvgPool3DForward(const int nthreads, ...@@ -600,16 +598,13 @@ __global__ void KeAvgPool3DForward(const int nthreads,
int dstart = pd * strideD - padD; int dstart = pd * strideD - padD;
int hstart = ph * strideH - padH; int hstart = ph * strideH - padH;
int wstart = pw * strideW - padW; int wstart = pw * strideW - padW;
int dend = min(dstart + sizeZ, depth + padD); int dend = min(dstart + sizeZ, depth);
int hend = min(hstart + sizeY, height + padH); int hend = min(hstart + sizeY, height);
int wend = min(wstart + sizeX, width + padW); int wend = min(wstart + sizeX, width);
int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
dstart = max(dstart, 0); dstart = max(dstart, 0);
hstart = max(hstart, 0); hstart = max(hstart, 0);
wstart = max(wstart, 0); wstart = max(wstart, 0);
dend = min(dend, depth); int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
hend = min(hend, height);
wend = min(wend, width);
real aveval = 0; real aveval = 0;
inputData += (frameNum * channels + c) * depth * height * width; inputData += (frameNum * channels + c) * depth * height * width;
...@@ -712,14 +707,14 @@ __global__ void KeAvgPool3DBackward(const int nthreads, ...@@ -712,14 +707,14 @@ __global__ void KeAvgPool3DBackward(const int nthreads,
outGrad += (frameNum * channels + offsetC) * pooledD * pooledH * pooledW; outGrad += (frameNum * channels + offsetC) * pooledD * pooledH * pooledW;
for (int pd = pdstart; pd < pdend; ++pd) { for (int pd = pdstart; pd < pdend; ++pd) {
int dstart = pd * strideD - padD;
int dend = min(dstart + sizeZ, depth + padD);
for (int ph = phstart; ph < phend; ++ph) { for (int ph = phstart; ph < phend; ++ph) {
int hstart = ph * strideH - padH;
int hend = min(hstart + sizeY, height + padH);
for (int pw = pwstart; pw < pwend; ++pw) { for (int pw = pwstart; pw < pwend; ++pw) {
// figure out the pooling size // figure out the pooling size
int dstart = pd * strideD - padD;
int hstart = ph * strideH - padH;
int wstart = pw * strideW - padW; int wstart = pw * strideW - padW;
int dend = min(dstart + sizeZ, depth + padD);
int hend = min(hstart + sizeY, height + padH);
int wend = min(wstart + sizeX, width + padW); int wend = min(wstart + sizeX, width + padW);
int poolsize = (dend - dstart) * (hend - hstart) * (wend - wstart); int poolsize = (dend - dstart) * (hend - hstart) * (wend - wstart);
gradient += outGrad[(pd * pooledH + ph) * pooledW + pw] / poolsize; gradient += outGrad[(pd * pooledH + ph) * pooledW + pw] / poolsize;
......
...@@ -1033,17 +1033,15 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -1033,17 +1033,15 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat,
real* inputData = inputMat.getData(); real* inputData = inputMat.getData();
size_t frameNum = inputMat.getHeight(); size_t frameNum = inputMat.getHeight();
size_t width = imgSizeW; CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth());
size_t height = imgSizeH;
CHECK(height * width * channels == inputMat.getWidth());
CHECK(height_ == inputMat.getHeight()); CHECK(height_ == inputMat.getHeight());
CHECK(width_ == outputH * outputW * channels); CHECK(width_ == outputH * outputW * channels);
hl_maxpool_forward(frameNum, hl_maxpool_forward(frameNum,
inputData, inputData,
channels, channels,
height, imgSizeH,
width, imgSizeW,
outputH, outputH,
outputW, outputW,
sizeX, sizeX,
...@@ -1080,11 +1078,9 @@ void GpuMatrix::maxPoolBackward(Matrix& inputMat, ...@@ -1080,11 +1078,9 @@ void GpuMatrix::maxPoolBackward(Matrix& inputMat,
real* outDiff = outGrad.getData(); real* outDiff = outGrad.getData();
size_t frameNum = inputMat.getHeight(); size_t frameNum = inputMat.getHeight();
size_t channels = outV.getWidth() / outputH / outputW; size_t channels = outV.getWidth() / outputH / outputW;
size_t width = imgSizeW; CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth());
size_t height = imgSizeH;
CHECK(height * width * channels == inputMat.getWidth());
CHECK(height_ == inputMat.getHeight()); CHECK(height_ == inputMat.getHeight());
CHECK(width_ == width * height * channels); CHECK(width_ == imgSizeW * imgSizeH * channels);
CHECK(outGrad.getHeight() == outV.getHeight() && CHECK(outGrad.getHeight() == outV.getHeight() &&
outGrad.getWidth() == outV.getWidth()); outGrad.getWidth() == outV.getWidth());
...@@ -1093,8 +1089,8 @@ void GpuMatrix::maxPoolBackward(Matrix& inputMat, ...@@ -1093,8 +1089,8 @@ void GpuMatrix::maxPoolBackward(Matrix& inputMat,
outData, outData,
outDiff, outDiff,
channels, channels,
height, imgSizeH,
width, imgSizeW,
outputH, outputH,
outputW, outputW,
sizeX, sizeX,
...@@ -1125,17 +1121,15 @@ void GpuMatrix::avgPoolForward(Matrix& inputMat, ...@@ -1125,17 +1121,15 @@ void GpuMatrix::avgPoolForward(Matrix& inputMat,
real* inputData = inputMat.getData(); real* inputData = inputMat.getData();
size_t frameNum = inputMat.getHeight(); size_t frameNum = inputMat.getHeight();
size_t height = imgSizeH; CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth());
size_t width = imgSizeW;
CHECK(height * width * channels == inputMat.getWidth());
CHECK(height_ == inputMat.getHeight()); CHECK(height_ == inputMat.getHeight());
CHECK(width_ == outputH * outputW * channels); CHECK(width_ == outputH * outputW * channels);
hl_avgpool_forward(frameNum, hl_avgpool_forward(frameNum,
inputData, inputData,
channels, channels,
height, imgSizeH,
width, imgSizeW,
outputH, outputH,
outputW, outputW,
sizeX, sizeX,
...@@ -1166,17 +1160,15 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, ...@@ -1166,17 +1160,15 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad,
real* outDiff = outGrad.getData(); real* outDiff = outGrad.getData();
size_t frameNum = outGrad.getHeight(); size_t frameNum = outGrad.getHeight();
size_t channels = outGrad.getWidth() / outputH / outputW; size_t channels = outGrad.getWidth() / outputH / outputW;
size_t height = imgSizeH; CHECK(imgSizeH * imgSizeW * channels == width_);
size_t width = imgSizeW;
CHECK(height * width * channels == width_);
CHECK(height_ == outGrad.getHeight()); CHECK(height_ == outGrad.getHeight());
CHECK(outGrad.getWidth() == outputH * outputW * channels); CHECK(outGrad.getWidth() == outputH * outputW * channels);
hl_avgpool_backward(frameNum, hl_avgpool_backward(frameNum,
outDiff, outDiff,
channels, channels,
height, imgSizeH,
width, imgSizeW,
outputH, outputH,
outputW, outputW,
sizeX, sizeX,
...@@ -1214,19 +1206,16 @@ void GpuMatrix::maxPool3DForward(Matrix& inputMat, ...@@ -1214,19 +1206,16 @@ void GpuMatrix::maxPool3DForward(Matrix& inputMat,
real* inputData = inputMat.getData(); real* inputData = inputMat.getData();
real* maxPoolIdxData = maxPoolIdx.getData(); real* maxPoolIdxData = maxPoolIdx.getData();
size_t num = inputMat.getHeight(); size_t num = inputMat.getHeight();
size_t width = imgSizeW; CHECK(imgSizeD * imgSizeH * imgSizeW * channels == inputMat.getWidth());
size_t height = imgSizeH;
size_t depth = imgSizeD;
CHECK(depth * height * width * channels == inputMat.getWidth());
CHECK(height_ == inputMat.getHeight()); CHECK(height_ == inputMat.getHeight());
CHECK(width_ == outputD * outputH * outputW * channels); CHECK(width_ == outputD * outputH * outputW * channels);
hl_maxpool3D_forward(num, hl_maxpool3D_forward(num,
inputData, inputData,
channels, channels,
depth, imgSizeD,
height, imgSizeH,
width, imgSizeW,
outputD, outputD,
outputH, outputH,
outputW, outputW,
...@@ -1269,20 +1258,17 @@ void GpuMatrix::maxPool3DBackward(Matrix& outGrad, ...@@ -1269,20 +1258,17 @@ void GpuMatrix::maxPool3DBackward(Matrix& outGrad,
real* maxPoolIdxData = maxPoolIdx.getData(); real* maxPoolIdxData = maxPoolIdx.getData();
size_t frameNum = getHeight(); size_t frameNum = getHeight();
size_t channels = outGrad.getWidth() / outputD / outputH / outputW; size_t channels = outGrad.getWidth() / outputD / outputH / outputW;
size_t width = imgSizeW; CHECK(imgSizeD * imgSizeH * imgSizeW * channels == getWidth());
size_t height = imgSizeH; CHECK(width_ == imgSizeD * imgSizeH * imgSizeW * channels);
size_t depth = imgSizeD;
CHECK(depth * height * width * channels == getWidth());
CHECK(width_ == depth * width * height * channels);
CHECK(outGrad.getHeight() == maxPoolIdx.getHeight() && CHECK(outGrad.getHeight() == maxPoolIdx.getHeight() &&
outGrad.getWidth() == maxPoolIdx.getWidth()); outGrad.getWidth() == maxPoolIdx.getWidth());
hl_maxpool3D_backward(frameNum, hl_maxpool3D_backward(frameNum,
outDiff, outDiff,
channels, channels,
depth, imgSizeD,
height, imgSizeH,
width, imgSizeW,
outputD, outputD,
outputH, outputH,
outputW, outputW,
...@@ -1323,19 +1309,16 @@ void GpuMatrix::avgPool3DForward(Matrix& inputMat, ...@@ -1323,19 +1309,16 @@ void GpuMatrix::avgPool3DForward(Matrix& inputMat,
real* inputData = inputMat.getData(); real* inputData = inputMat.getData();
size_t frameNum = inputMat.getHeight(); size_t frameNum = inputMat.getHeight();
size_t height = imgSizeH; CHECK(imgSizeD * imgSizeH * imgSizeW * channels == inputMat.getWidth());
size_t width = imgSizeW;
size_t depth = imgSizeD;
CHECK(depth * height * width * channels == inputMat.getWidth());
CHECK(height_ == inputMat.getHeight()); CHECK(height_ == inputMat.getHeight());
CHECK(width_ == outputD * outputH * outputW * channels); CHECK(width_ == outputD * outputH * outputW * channels);
hl_avgpool3D_forward(frameNum, hl_avgpool3D_forward(frameNum,
inputData, inputData,
channels, channels,
depth, imgSizeD,
height, imgSizeH,
width, imgSizeW,
outputD, outputD,
outputH, outputH,
outputW, outputW,
...@@ -1375,19 +1358,16 @@ void GpuMatrix::avgPool3DBackward(Matrix& outGrad, ...@@ -1375,19 +1358,16 @@ void GpuMatrix::avgPool3DBackward(Matrix& outGrad,
real* outDiff = outGrad.getData(); real* outDiff = outGrad.getData();
size_t frameNum = outGrad.getHeight(); size_t frameNum = outGrad.getHeight();
size_t channels = outGrad.getWidth() / outputD / outputH / outputW; size_t channels = outGrad.getWidth() / outputD / outputH / outputW;
size_t height = imgSizeH; CHECK(imgSizeD * imgSizeH * imgSizeW * channels == width_);
size_t width = imgSizeW;
size_t depth = imgSizeD;
CHECK(depth * height * width * channels == width_);
CHECK(height_ == outGrad.getHeight()); CHECK(height_ == outGrad.getHeight());
CHECK(outGrad.getWidth() == outputD * outputH * outputW * channels); CHECK(outGrad.getWidth() == outputD * outputH * outputW * channels);
hl_avgpool3D_backward(frameNum, hl_avgpool3D_backward(frameNum,
outDiff, outDiff,
channels, channels,
depth, imgSizeD,
height, imgSizeH,
width, imgSizeW,
outputD, outputD,
outputH, outputH,
outputW, outputW,
...@@ -1999,11 +1979,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -1999,11 +1979,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
real* inputData = inputMat.getData(); real* inputData = inputMat.getData();
real* outData = data_; real* outData = data_;
size_t num = inputMat.getHeight(); size_t num = inputMat.getHeight();
size_t inWidth = imgSizeW; size_t inLength = imgSizeH * imgSizeW;
size_t inHeight = imgSizeH; size_t outLength = outputH * outputW;
CHECK(inHeight * inWidth == inputMat.getWidth() / channels); CHECK(inLength == inputMat.getWidth() / channels);
CHECK_EQ(num, this->getHeight()); CHECK_EQ(num, this->getHeight());
CHECK_EQ(channels * outputH * outputW, this->getWidth()); CHECK_EQ(channels * outLength, this->getWidth());
size_t outStride = getStride(); size_t outStride = getStride();
/* initialize the data_ */ /* initialize the data_ */
...@@ -2020,24 +2000,24 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -2020,24 +2000,24 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
} }
for (size_t c = 0; c < channels; ++c) { // channel by channel for (size_t c = 0; c < channels; ++c) { // channel by channel
for (size_t ph = 0; ph < outputH; ++ph) { for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
int hstart = ph * strideH - paddingH; int hstart = ph * strideH - paddingH;
int wstart = pw * strideW - paddingW; int hend = std::min(hstart + sizeY, imgSizeH);
int hend = std::min(hstart + sizeY, inHeight);
int wend = std::min(wstart + sizeX, inWidth);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
outData[ph * outputW + pw] = std::max(outData[ph * outputW + pw], outData[ph * outputW + pw] = std::max(
inputData[h * inWidth + w]); outData[ph * outputW + pw], inputData[h * imgSizeW + w]);
} }
} }
} }
} }
// compute offset // compute offset
inputData += inHeight * inWidth; inputData += inLength;
outData += outputH * outputW; outData += outLength;
} }
} }
} }
...@@ -2058,8 +2038,10 @@ void CpuMatrix::maxPoolBackward(Matrix& image, ...@@ -2058,8 +2038,10 @@ void CpuMatrix::maxPoolBackward(Matrix& image,
size_t paddingH, size_t paddingH,
size_t paddingW) { size_t paddingW) {
size_t num = image.getHeight(); size_t num = image.getHeight();
size_t channels = size_t(width_ / imgSizeH / imgSizeW); size_t inLength = imgSizeH * imgSizeW;
CHECK(image.getWidth() == imgSizeH * imgSizeW * channels); size_t outLength = outputH * outputW;
size_t channels = size_t(width_ / inLength);
CHECK(image.getWidth() == inLength * channels);
CHECK(image.getHeight() == height_ && image.getWidth() == width_); CHECK(image.getHeight() == height_ && image.getWidth() == width_);
CHECK(outV.getHeight() == outGrad.getHeight() && CHECK(outV.getHeight() == outGrad.getHeight() &&
outV.getWidth() == outGrad.getWidth()); outV.getWidth() == outGrad.getWidth());
...@@ -2080,12 +2062,12 @@ void CpuMatrix::maxPoolBackward(Matrix& image, ...@@ -2080,12 +2062,12 @@ void CpuMatrix::maxPoolBackward(Matrix& image,
} }
for (size_t c = 0; c < channels; ++c) { for (size_t c = 0; c < channels; ++c) {
for (size_t ph = 0; ph < outputH; ++ph) { for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
int hstart = ph * strideH - paddingH; int hstart = ph * strideH - paddingH;
int wstart = pw * strideW - paddingW;
int hend = std::min(hstart + sizeY, imgSizeH); int hend = std::min(hstart + sizeY, imgSizeH);
int wend = std::min(wstart + sizeX, imgSizeW);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
...@@ -2098,10 +2080,10 @@ void CpuMatrix::maxPoolBackward(Matrix& image, ...@@ -2098,10 +2080,10 @@ void CpuMatrix::maxPoolBackward(Matrix& image,
} }
} }
// offset // offset
inData += imgSizeH * imgSizeW; inData += inLength;
tgtGrad += imgSizeH * imgSizeW; tgtGrad += inLength;
otData += outputH * outputW; otData += outLength;
otGrad += outputH * outputW; otGrad += outLength;
} }
} }
} }
...@@ -2120,10 +2102,10 @@ void CpuMatrix::avgPoolForward(Matrix& input, ...@@ -2120,10 +2102,10 @@ void CpuMatrix::avgPoolForward(Matrix& input,
size_t paddingW) { size_t paddingW) {
// The main loop // The main loop
size_t num = input.getHeight(); size_t num = input.getHeight();
size_t inHeight = imgSizeH; size_t inLength = imgSizeH * imgSizeW;
size_t inWidth = imgSizeW; size_t outLength = outputH * outputW;
CHECK(inHeight * inWidth * channels == input.getWidth()); CHECK(inLength * channels == input.getWidth());
CHECK(outputH * outputW * channels * num == height_ * width_); CHECK(outLength * channels * num == height_ * width_);
real* tgtData = data_; real* tgtData = data_;
real* inData = input.getData(); real* inData = input.getData();
...@@ -2133,30 +2115,27 @@ void CpuMatrix::avgPoolForward(Matrix& input, ...@@ -2133,30 +2115,27 @@ void CpuMatrix::avgPoolForward(Matrix& input,
} }
for (size_t c = 0; c < channels; ++c) { for (size_t c = 0; c < channels; ++c) {
for (size_t ph = 0; ph < outputH; ++ph) { for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
int hstart = ph * strideH - paddingH; int hstart = ph * strideH - paddingH;
int wstart = pw * strideW - paddingW; int hend = std::min(hstart + sizeY, imgSizeH);
int hend = std::min(hstart + sizeY, inHeight + paddingH);
int wend = std::min(wstart + sizeX, inWidth + paddingW);
int poolSize = (hend - hstart) * (wend - wstart);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
hend = std::min(hend, static_cast<int>(inHeight));
wend = std::min(wend, static_cast<int>(inWidth));
CHECK(poolSize);
tgtData[ph * outputW + pw] = 0; // clear tgtData[ph * outputW + pw] = 0; // clear
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
tgtData[ph * outputW + pw] += inData[h * inWidth + w]; tgtData[ph * outputW + pw] += inData[h * imgSizeW + w];
} }
} }
int poolSize = (hend - hstart) * (wend - wstart);
CHECK(poolSize);
tgtData[ph * outputW + pw] /= poolSize; tgtData[ph * outputW + pw] /= poolSize;
} }
} }
// compute offset // compute offset
inData += inHeight * inWidth; inData += inLength;
tgtData += outputH * outputW; tgtData += outLength;
} }
} }
} }
...@@ -2176,7 +2155,9 @@ void CpuMatrix::avgPoolBackward(Matrix& input, ...@@ -2176,7 +2155,9 @@ void CpuMatrix::avgPoolBackward(Matrix& input,
size_t paddingW) { size_t paddingW) {
size_t num = input.getHeight(); size_t num = input.getHeight();
size_t channels = input.getWidth() / outputH / outputW; size_t channels = input.getWidth() / outputH / outputW;
CHECK(imgSizeH * imgSizeW * channels == getWidth()); size_t inLength = imgSizeH * imgSizeW;
size_t outLength = outputH * outputW;
CHECK(inLength * channels == getWidth());
real* inData = input.getData(); real* inData = input.getData();
real* outData = getData(); real* outData = getData();
...@@ -2186,16 +2167,14 @@ void CpuMatrix::avgPoolBackward(Matrix& input, ...@@ -2186,16 +2167,14 @@ void CpuMatrix::avgPoolBackward(Matrix& input,
} }
for (size_t c = 0; c < channels; ++c) { for (size_t c = 0; c < channels; ++c) {
for (size_t ph = 0; ph < outputH; ++ph) { for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
int hstart = ph * strideH - paddingH; int hstart = ph * strideH - paddingH;
int wstart = pw * strideW - paddingW; int hend = std::min(hstart + sizeY, imgSizeH);
int hend = std::min(hstart + sizeY, imgSizeH + paddingH);
int wend = std::min(wstart + sizeX, imgSizeW + paddingW);
int poolSize = (hend - hstart) * (wend - wstart);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
hend = std::min(hend, static_cast<int>(imgSizeH)); int poolSize = (hend - hstart) * (wend - wstart);
wend = std::min(wend, static_cast<int>(imgSizeW));
CHECK(poolSize); CHECK(poolSize);
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
...@@ -2206,8 +2185,8 @@ void CpuMatrix::avgPoolBackward(Matrix& input, ...@@ -2206,8 +2185,8 @@ void CpuMatrix::avgPoolBackward(Matrix& input,
} }
} }
// offset // offset
outData += imgSizeH * imgSizeW; outData += inLength;
inData += outputH * outputW; inData += outLength;
} }
} }
} }
...@@ -2234,12 +2213,11 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, ...@@ -2234,12 +2213,11 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
real* outData = getData(); real* outData = getData();
real* maxPoolIdxData = maxPoolIdx.getData(); real* maxPoolIdxData = maxPoolIdx.getData();
size_t num = inputMat.getHeight(); size_t num = inputMat.getHeight();
size_t inWidth = imgSizeW; size_t inLength = imgSizeH * imgSizeW * imgSizeD;
size_t inHeight = imgSizeH; size_t outLength = outputH * outputW * outputD;
size_t inDepth = imgSizeD; CHECK(inLength == inputMat.getWidth() / channels);
CHECK(inHeight * inWidth * inDepth == inputMat.getWidth() / channels);
CHECK_EQ(num, this->getHeight()); CHECK_EQ(num, this->getHeight());
CHECK_EQ(channels * outputH * outputW * outputD, this->getWidth()); CHECK_EQ(channels * outLength, this->getWidth());
size_t outStride = getStride(); size_t outStride = getStride();
/* initialize the data_ */ /* initialize the data_ */
...@@ -2258,16 +2236,16 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, ...@@ -2258,16 +2236,16 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
} }
for (size_t c = 0; c < channels; ++c) { // channel by channel for (size_t c = 0; c < channels; ++c) { // channel by channel
for (size_t pd = 0; pd < outputD; ++pd) { for (size_t pd = 0; pd < outputD; ++pd) {
for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
int dstart = pd * strideD - paddingD; int dstart = pd * strideD - paddingD;
int hstart = ph * strideH - paddingH; int dend = std::min(dstart + sizeZ, imgSizeD);
int wstart = pw * strideW - paddingW;
int dend = std::min(dstart + sizeZ, inDepth);
int hend = std::min(hstart + sizeY, inHeight);
int wend = std::min(wstart + sizeX, inWidth);
dstart = std::max(dstart, 0); dstart = std::max(dstart, 0);
for (size_t ph = 0; ph < outputH; ++ph) {
int hstart = ph * strideH - paddingH;
int hend = std::min(hstart + sizeY, imgSizeH);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
int maxIdx = -1; int maxIdx = -1;
real maxOutData = outData[(pd * outputH + ph) * outputW + pw]; real maxOutData = outData[(pd * outputH + ph) * outputW + pw];
...@@ -2275,9 +2253,9 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, ...@@ -2275,9 +2253,9 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
if (maxOutData < if (maxOutData <
inputData[(d * inHeight + h) * inWidth + w]) { inputData[(d * imgSizeH + h) * imgSizeW + w]) {
maxOutData = inputData[(d * inHeight + h) * inWidth + w]; maxOutData = inputData[(d * imgSizeH + h) * imgSizeW + w];
maxIdx = (d * inHeight + h) * inWidth + w; maxIdx = (d * imgSizeH + h) * imgSizeW + w;
} }
} }
} }
...@@ -2288,9 +2266,9 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, ...@@ -2288,9 +2266,9 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat,
} }
} }
// compute offset // compute offset
inputData += inDepth * inHeight * inWidth; inputData += inLength;
outData += outputD * outputH * outputW; outData += outLength;
maxPoolIdxData += outputD * outputH * outputW; maxPoolIdxData += outLength;
} }
} }
} }
...@@ -2315,7 +2293,9 @@ void CpuMatrix::maxPool3DBackward(Matrix& outGrad, ...@@ -2315,7 +2293,9 @@ void CpuMatrix::maxPool3DBackward(Matrix& outGrad,
real scaleTargets, real scaleTargets,
real scaleOutput) { real scaleOutput) {
size_t num = getHeight(); size_t num = getHeight();
size_t channels = size_t(width_ / imgSizeD / imgSizeH / imgSizeW); size_t inLength = imgSizeH * imgSizeW * imgSizeD;
size_t outLength = outputH * outputW * outputD;
size_t channels = size_t(width_ / inLength);
CHECK(maxPoolIdx.getHeight() == outGrad.getHeight() && CHECK(maxPoolIdx.getHeight() == outGrad.getHeight() &&
maxPoolIdx.getWidth() == outGrad.getWidth()); maxPoolIdx.getWidth() == outGrad.getWidth());
...@@ -2341,9 +2321,9 @@ void CpuMatrix::maxPool3DBackward(Matrix& outGrad, ...@@ -2341,9 +2321,9 @@ void CpuMatrix::maxPool3DBackward(Matrix& outGrad,
} }
} }
// offset // offset
tgtGrad += imgSizeD * imgSizeH * imgSizeW; tgtGrad += inLength;
otGrad += outputD * outputH * outputW; otGrad += outLength;
maxPoolIdxData += outputD * outputH * outputW; maxPoolIdxData += outLength;
} }
} }
} }
...@@ -2367,11 +2347,10 @@ void CpuMatrix::avgPool3DForward(Matrix& input, ...@@ -2367,11 +2347,10 @@ void CpuMatrix::avgPool3DForward(Matrix& input,
size_t paddingW) { size_t paddingW) {
// The main loop // The main loop
size_t num = input.getHeight(); size_t num = input.getHeight();
size_t inDepth = imgSizeD; size_t inLength = imgSizeH * imgSizeW * imgSizeD;
size_t inHeight = imgSizeH; size_t outLength = outputH * outputW * outputD;
size_t inWidth = imgSizeW; CHECK(inLength * channels == input.getWidth());
CHECK(inDepth * inHeight * inWidth * channels == input.getWidth()); CHECK(outLength * channels * num == height_ * width_);
CHECK(outputD * outputH * outputW * channels * num == height_ * width_);
real* tgtData = getData(); real* tgtData = getData();
real* inData = input.getData(); real* inData = input.getData();
...@@ -2381,39 +2360,36 @@ void CpuMatrix::avgPool3DForward(Matrix& input, ...@@ -2381,39 +2360,36 @@ void CpuMatrix::avgPool3DForward(Matrix& input,
} }
for (size_t c = 0; c < channels; ++c) { for (size_t c = 0; c < channels; ++c) {
for (size_t pd = 0; pd < outputD; ++pd) { for (size_t pd = 0; pd < outputD; ++pd) {
for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
int dstart = pd * strideD - paddingD; int dstart = pd * strideD - paddingD;
int hstart = ph * strideH - paddingH; int dend = std::min(dstart + sizeZ, imgSizeD);
int wstart = pw * strideW - paddingW;
int dend = std::min(dstart + sizeZ, inDepth + paddingD);
int hend = std::min(hstart + sizeY, inHeight + paddingH);
int wend = std::min(wstart + sizeX, inWidth + paddingW);
int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart);
dstart = std::max(dstart, 0); dstart = std::max(dstart, 0);
for (size_t ph = 0; ph < outputH; ++ph) {
int hstart = ph * strideH - paddingH;
int hend = std::min(hstart + sizeY, imgSizeH);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
dend = std::min(dend, static_cast<int>(inDepth));
hend = std::min(hend, static_cast<int>(inHeight));
wend = std::min(wend, static_cast<int>(inWidth));
CHECK(poolSize);
tgtData[(pd * outputH + ph) * outputW + pw] = 0; // clear tgtData[(pd * outputH + ph) * outputW + pw] = 0; // clear
for (int d = dstart; d < dend; ++d) { for (int d = dstart; d < dend; ++d) {
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
tgtData[(pd * outputH + ph) * outputW + pw] += tgtData[(pd * outputH + ph) * outputW + pw] +=
inData[(d * inHeight + h) * inWidth + w]; inData[(d * imgSizeH + h) * imgSizeW + w];
} }
} }
} }
int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart);
CHECK(poolSize);
tgtData[(pd * outputH + ph) * outputW + pw] /= poolSize; tgtData[(pd * outputH + ph) * outputW + pw] /= poolSize;
} }
} }
} }
// compute offset // compute offset
inData += inDepth * inHeight * inWidth; inData += inLength;
tgtData += outputD * outputH * outputW; tgtData += outLength;
} }
} }
} }
...@@ -2437,8 +2413,10 @@ void CpuMatrix::avgPool3DBackward(Matrix& input, ...@@ -2437,8 +2413,10 @@ void CpuMatrix::avgPool3DBackward(Matrix& input,
real scaleTargets, real scaleTargets,
real scaleOutput) { real scaleOutput) {
size_t num = input.getHeight(); size_t num = input.getHeight();
size_t channels = input.getWidth() / outputD / outputH / outputW; size_t inLength = imgSizeH * imgSizeW * imgSizeD;
CHECK(imgSizeD * imgSizeH * imgSizeW * channels == getWidth()); size_t outLength = outputH * outputW * outputD;
size_t channels = input.getWidth() / outLength;
CHECK(inLength * channels == getWidth());
real* inData = input.getData(); real* inData = input.getData();
real* outData = getData(); real* outData = getData();
...@@ -2448,21 +2426,18 @@ void CpuMatrix::avgPool3DBackward(Matrix& input, ...@@ -2448,21 +2426,18 @@ void CpuMatrix::avgPool3DBackward(Matrix& input,
} }
for (size_t c = 0; c < channels; ++c) { for (size_t c = 0; c < channels; ++c) {
for (size_t pd = 0; pd < outputD; ++pd) { for (size_t pd = 0; pd < outputD; ++pd) {
for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
int dstart = pd * strideD - paddingD; int dstart = pd * strideD - paddingD;
int hstart = ph * strideH - paddingH; int dend = std::min(dstart + sizeZ, imgSizeD);
int wstart = pw * strideW - paddingW;
int dend = std::min(dstart + sizeZ, imgSizeD + paddingD);
int hend = std::min(hstart + sizeY, imgSizeH + paddingH);
int wend = std::min(wstart + sizeX, imgSizeW + paddingW);
int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart);
dstart = std::max(dstart, 0); dstart = std::max(dstart, 0);
for (size_t ph = 0; ph < outputH; ++ph) {
int hstart = ph * strideH - paddingH;
int hend = std::min(hstart + sizeY, imgSizeH);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
for (size_t pw = 0; pw < outputW; ++pw) {
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
dend = std::min(dend, static_cast<int>(imgSizeD)); int poolSize = (dend - dstart) * (hend - hstart) * (wend - wstart);
hend = std::min(hend, static_cast<int>(imgSizeH));
wend = std::min(wend, static_cast<int>(imgSizeW));
CHECK(poolSize); CHECK(poolSize);
for (int d = dstart; d < dend; ++d) { for (int d = dstart; d < dend; ++d) {
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
...@@ -2476,8 +2451,8 @@ void CpuMatrix::avgPool3DBackward(Matrix& input, ...@@ -2476,8 +2451,8 @@ void CpuMatrix::avgPool3DBackward(Matrix& input,
} }
} }
// offset // offset
outData += imgSizeD * imgSizeH * imgSizeW; outData += inLength;
inData += outputD * outputH * outputW; inData += outLength;
} }
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册