diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h index 84f1c843596d4c871959594595c577eeb916c6f9..6b56d9ec8d3daae96aaaa04ed79cb637331e2281 100644 --- a/paddle/cuda/include/hl_cnn.h +++ b/paddle/cuda/include/hl_cnn.h @@ -192,11 +192,10 @@ extern void hl_maxpool3D_forward(const int frameCnt, const int paddingH, const int paddingW, real* tgtData, + real* maxPoolIdxData, const int tgtStride); extern void hl_maxpool3D_backward(const int frameCnt, - const real* inputData, - const real* outData, const real* outGrad, const int channels, const int depth, @@ -217,6 +216,7 @@ extern void hl_maxpool3D_backward(const int frameCnt, real scaleA, real scaleB, real* targetGrad, + real* maxPoolIdxData, const int outStride); extern void hl_avgpool3D_forward(const int frameCnt, diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h index 6750f537bfbf60309eb10761676d712c6508dff3..a76dbf0b6578de0606702ad1af227fbf6e1cd62e 100644 --- a/paddle/cuda/include/stub/hl_cnn_stub.h +++ b/paddle/cuda/include/stub/hl_cnn_stub.h @@ -106,11 +106,10 @@ inline void hl_maxpool3D_forward(const int frameCnt, const int paddingH, const int paddingW, real* tgtData, + real* maxPoolIdxData, const int tgtStride) {} inline void hl_maxpool3D_backward(const int frameCnt, - const real* inputData, - const real* outData, const real* outGrad, const int channels, const int depth, @@ -131,6 +130,7 @@ inline void hl_maxpool3D_backward(const int frameCnt, real scaleA, real scaleB, real* targetGrad, + real* maxPoolIdxData, const int outStride) {} inline void hl_avgpool3D_forward(const int frameCnt, diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu index 458c347728952566a87a075d496149ec2dbeb9f0..95440c9446384009789431013725085358c107c5 100644 --- a/paddle/cuda/src/hl_cuda_cnn.cu +++ b/paddle/cuda/src/hl_cuda_cnn.cu @@ -366,10 +366,11 @@ __global__ void KeMaxPool3DForward(const int nthreads, const int strideD, const int strideH, const int strideW, - const int offsetD, - const int offsetH, - const int offsetW, + const int padD, + const int padH, + const int padW, real* tgtData, + real* maxPoolIdxData, const int tgtStride) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); index += blockDim.x * gridDim.x) { @@ -378,9 +379,9 @@ __global__ void KeMaxPool3DForward(const int nthreads, int pd = (index / pooledW / pooledH) % pooledD; int c = (index / pooledW / pooledH / pooledD) % channels; int frameNum = index / pooledW / pooledH / pooledD / channels; - int dstart = pd * strideD - offsetD; - int hstart = ph * strideH - offsetH; - int wstart = pw * strideW - offsetW; + int dstart = pd * strideD - padD; + int hstart = ph * strideH - padH; + int wstart = pw * strideW - padW; int dend = min(dstart + ksizeD, depth); int hend = min(hstart + ksizeH, height); int wend = min(wstart + ksizeW, width); @@ -388,18 +389,22 @@ __global__ void KeMaxPool3DForward(const int nthreads, hstart = max(hstart, 0); wstart = max(wstart, 0); real maxval = -FLT_MAX; + int maxIdx = -1; inputData += (frameNum * channels + c) * depth * height * width; for (int d = dstart; d < dend; ++d) { for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { - if (maxval < inputData[(d * height + h) * width + w]) + if (maxval < inputData[(d * height + h) * width + w]) { maxval = inputData[(d * height + h) * width + w]; + maxIdx = (d * height + h) * width + w; + } } } } int tgtIndex = index % (pooledW * pooledH * pooledD * channels) + frameNum * tgtStride; tgtData[tgtIndex] = maxval; + maxPoolIdxData[tgtIndex] = maxIdx; } } @@ -418,10 +423,11 @@ void hl_maxpool3D_forward(const int frameCnt, const int strideD, const int strideH, const int strideW, - const int paddingD, - const int paddingH, - const int paddingW, + const int padD, + const int padH, + const int padW, real* tgtData, + real* maxPoolIdxData, const int tgtStride) { int num_kernels = pooledD * pooledH * pooledW * channels * frameCnt; int blocks = (num_kernels + 1024 - 1) / 1024; @@ -443,17 +449,16 @@ void hl_maxpool3D_forward(const int frameCnt, strideD, strideH, strideW, - paddingD, - paddingH, - paddingW, + padD, + padH, + padW, tgtData, + maxPoolIdxData, tgtStride); CHECK_SYNC("hl_maxpool3D_forward failed"); } __global__ void KeMaxPool3DBackward(const int nthreads, - const real* inputData, - const real* outData, const real* outGrad, const int channels, const int depth, @@ -474,33 +479,35 @@ __global__ void KeMaxPool3DBackward(const int nthreads, real scaleA, real scaleB, real* targetGrad, + real* maxPoolIdxData, const int outStride) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < (nthreads); index += blockDim.x * gridDim.x) { - // find out the local index - // find out the local offset - int offsetW = index % width + padW; - int offsetH = (index / width) % height + padH; - int offsetD = (index / width / height) % depth + padD; + int offsetW = index % width; + int offsetH = (index / width) % height; + int offsetD = (index / width / height) % depth; int offsetC = (index / width / height / depth) % channels; int frameNum = index / width / height / depth / channels; - int pdstart = (offsetD < sizeZ) ? 0 : (offsetD - sizeZ) / strideD + 1; - int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1; - int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1; - int pdend = min(offsetD / strideD + 1, pooledD); - int phend = min(offsetH / strideH + 1, pooledH); - int pwend = min(offsetW / strideW + 1, pooledW); + int pdstart = + (offsetD + padD < sizeZ) ? 0 : (offsetD + padD - sizeZ) / strideD + 1; + int phstart = + (offsetH + padH < sizeY) ? 0 : (offsetH + padH - sizeY) / strideH + 1; + int pwstart = + (offsetW + padW < sizeX) ? 0 : (offsetW + padW - sizeX) / strideW + 1; + int pdend = min((offsetD + padD) / strideD + 1, pooledD); + int phend = min((offsetH + padH) / strideH + 1, pooledH); + int pwend = min((offsetW + padW) / strideW + 1, pooledW); real gradient = 0; - real input = inputData[index]; - - outData += ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW); outGrad += ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW); + maxPoolIdxData += + ((frameNum * channels + offsetC) * pooledD * pooledH * pooledW); for (int pd = pdstart; pd < pdend; ++pd) { for (int ph = phstart; ph < phend; ++ph) { for (int pw = pwstart; pw < pwend; ++pw) { - if (input == outData[(pd * pooledH + ph) * pooledW + pw]) + if (((offsetD * height + offsetH) * width + offsetW) == + maxPoolIdxData[(pd * pooledH + ph) * pooledW + pw]) gradient += outGrad[(pd * pooledH + ph) * pooledW + pw]; } } @@ -510,8 +517,6 @@ __global__ void KeMaxPool3DBackward(const int nthreads, } void hl_maxpool3D_backward(const int frameCnt, - const real* inputData, - const real* outData, const real* outGrad, const int channels, const int depth, @@ -532,13 +537,12 @@ void hl_maxpool3D_backward(const int frameCnt, real scaleA, real scaleB, real* targetGrad, + real* maxPoolIdxData, const int outStride) { int num_kernels = depth * height * width * channels * frameCnt; int blocks = (num_kernels + 1024 - 1) / 1024; KeMaxPool3DBackward<<>>(num_kernels, - inputData, - outData, outGrad, channels, depth, @@ -559,6 +563,7 @@ void hl_maxpool3D_backward(const int frameCnt, scaleA, scaleB, targetGrad, + maxPoolIdxData, outStride); CHECK_SYNC("hl_maxpool3D_backward"); } diff --git a/paddle/gserver/layers/Pool3DLayer.cpp b/paddle/gserver/layers/Pool3DLayer.cpp index 40a913ebfc6681bae1ac25edf39776b6dd781e11..199f21adb1a5923b590e4f0e716fc67effb2a2d1 100644 --- a/paddle/gserver/layers/Pool3DLayer.cpp +++ b/paddle/gserver/layers/Pool3DLayer.cpp @@ -72,9 +72,10 @@ size_t Pool3DLayer::getSize() { void Pool3DLayer::forward(PassType passType) { Layer::forward(passType); const MatrixPtr& inMat = inputLayers_[0]->getOutputValue(); - int batchSize = inMat->getHeight(); - int outWidth = getSize(); + size_t batchSize = inMat->getHeight(); + size_t outWidth = getSize(); resetOutput(batchSize, outWidth); + Matrix::resizeOrCreate(maxPoolIdx_, batchSize, outWidth, false, useGpu_); const MatrixPtr outMat = getOutputValue(); if (poolType_ == "avg") { @@ -97,6 +98,7 @@ void Pool3DLayer::forward(PassType passType) { paddingW_); } else if (poolType_ == "max") { outMat->maxPool3DForward(*inMat, + *maxPoolIdx_, channels_, imgSizeD_, imgSizeH_, @@ -149,9 +151,8 @@ void Pool3DLayer::backward(const UpdateCallback& callback) { 1.0, 1.0); } else if (poolType_ == "max") { - inGradMat->maxPool3DBackward(*inMat, - *outGradMat, - *outMat, + inGradMat->maxPool3DBackward(*outGradMat, + *maxPoolIdx_, imgSizeD_, imgSizeH_, imgSizeW_, diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 4f9216896c0ebd9357ce744f57057686bb6d2a32..54c2eae475047852c3303c4f9a626975a5f9b384 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1191,6 +1191,7 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, } void GpuMatrix::maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, size_t channels, size_t imgSizeD, size_t imgSizeH, @@ -1210,6 +1211,7 @@ void GpuMatrix::maxPool3DForward(Matrix& inputMat, CHECK(inputMat.useGpu_) << "Matrix type are not correct"; real* inputData = inputMat.getData(); + real* maxPoolIdxData = maxPoolIdx.getData(); size_t num = inputMat.getHeight(); size_t width = imgSizeW; size_t height = imgSizeH; @@ -1237,12 +1239,12 @@ void GpuMatrix::maxPool3DForward(Matrix& inputMat, paddingH, paddingW, getData(), + maxPoolIdxData, getStride()); } -void GpuMatrix::maxPool3DBackward(Matrix& inputMat, - Matrix& outGrad, - Matrix& outV, +void GpuMatrix::maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, @@ -1260,26 +1262,21 @@ void GpuMatrix::maxPool3DBackward(Matrix& inputMat, size_t paddingW, real scaleTargets, real scaleOutput) { - CHECK(inputMat.useGpu_ && outGrad.useGpu_ && outV.useGpu_) - << "Matrix type are not equal"; + CHECK(outGrad.useGpu_ && maxPoolIdx.useGpu_) << "Matrix type are not equal"; - real* inputData = inputMat.getData(); - real* outData = outV.getData(); real* outDiff = outGrad.getData(); - size_t frameNum = inputMat.getHeight(); - size_t channels = outV.getWidth() / outputD / outputH / outputW; + real* maxPoolIdxData = maxPoolIdx.getData(); + size_t frameNum = getHeight(); + size_t channels = outGrad.getWidth() / outputD / outputH / outputW; size_t width = imgSizeW; size_t height = imgSizeH; size_t depth = imgSizeD; - CHECK(depth * height * width * channels == inputMat.getWidth()); - CHECK(height_ == inputMat.getHeight()); + CHECK(depth * height * width * channels == getWidth()); CHECK(width_ == depth * width * height * channels); - CHECK(outGrad.getHeight() == outV.getHeight() && - outGrad.getWidth() == outV.getWidth()); + CHECK(outGrad.getHeight() == maxPoolIdx.getHeight() && + outGrad.getWidth() == maxPoolIdx.getWidth()); hl_maxpool3D_backward(frameNum, - inputData, - outData, outDiff, channels, depth, @@ -1300,6 +1297,7 @@ void GpuMatrix::maxPool3DBackward(Matrix& inputMat, scaleTargets, scaleOutput, getData(), + maxPoolIdxData, outGrad.getStride()); } @@ -2148,6 +2146,7 @@ void CpuMatrix::avgPoolBackward(Matrix& input, } void CpuMatrix::maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, size_t channels, size_t imgSizeD, size_t imgSizeH, @@ -2166,6 +2165,7 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, size_t paddingW) { real* inputData = inputMat.getData(); real* outData = getData(); + real* maxPoolIdxData = maxPoolIdx.getData(); size_t num = inputMat.getHeight(); size_t inWidth = imgSizeW; size_t inHeight = imgSizeH; @@ -2179,6 +2179,7 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, for (size_t i = 0; i < height_; i++) { for (size_t j = 0; j < width_; j++) { outData[(i)*outStride + j] = -(real)FLT_MAX; + maxPoolIdxData[(i)*outStride + j] = -1; } } @@ -2186,6 +2187,7 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, for (size_t n = 0; n < num; ++n) { // frame by frame if (!isContiguous()) { outData = getData() + n * outStride; + maxPoolIdxData = maxPoolIdx.getData() + n * outStride; } for (size_t c = 0; c < channels; ++c) { // channel by channel for (size_t pd = 0; pd < outputD; ++pd) { @@ -2200,6 +2202,7 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, dstart = std::max(dstart, 0); hstart = std::max(hstart, 0); wstart = std::max(wstart, 0); + int maxIdx = -1; real maxOutData = outData[(pd * outputH + ph) * outputW + pw]; for (int d = dstart; d < dend; ++d) { for (int h = hstart; h < hend; ++h) { @@ -2207,24 +2210,26 @@ void CpuMatrix::maxPool3DForward(Matrix& inputMat, if (maxOutData < inputData[(d * inHeight + h) * inWidth + w]) { maxOutData = inputData[(d * inHeight + h) * inWidth + w]; + maxIdx = (d * inHeight + h) * inWidth + w; } } } } outData[(pd * outputH + ph) * outputW + pw] = maxOutData; + maxPoolIdxData[(pd * outputH + ph) * outputW + pw] = maxIdx; } } } // compute offset inputData += inDepth * inHeight * inWidth; outData += outputD * outputH * outputW; + maxPoolIdxData += outputD * outputH * outputW; } } } -void CpuMatrix::maxPool3DBackward(Matrix& image, - Matrix& outGrad, - Matrix& outV, +void CpuMatrix::maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, @@ -2242,59 +2247,38 @@ void CpuMatrix::maxPool3DBackward(Matrix& image, size_t paddingW, real scaleTargets, real scaleOutput) { - size_t num = image.getHeight(); + size_t num = getHeight(); size_t channels = size_t(width_ / imgSizeD / imgSizeH / imgSizeW); - CHECK(image.getWidth() == imgSizeD * imgSizeH * imgSizeW * channels); - CHECK(image.getHeight() == height_ && image.getWidth() == width_); - CHECK(outV.getHeight() == outGrad.getHeight() && - outV.getWidth() == outGrad.getWidth()); + CHECK(maxPoolIdx.getHeight() == outGrad.getHeight() && + maxPoolIdx.getWidth() == outGrad.getWidth()); real* tgtGrad = getData(); - real* inData = image.getData(); - real* otData = outV.getData(); real* otGrad = outGrad.getData(); + real* maxPoolIdxData = maxPoolIdx.getData(); - size_t outStride = outV.getStride(); + size_t outStride = outGrad.getStride(); ; for (size_t n = 0; n < num; ++n) { - if (!outV.isContiguous()) { - otData = outV.getData() + n * outStride; + if (!outGrad.isContiguous()) { otGrad = outGrad.getData() + n * outStride; + maxPoolIdxData = maxPoolIdx.getData() + n * outStride; } for (size_t c = 0; c < channels; ++c) { for (size_t pd = 0; pd < outputD; ++pd) { for (size_t ph = 0; ph < outputH; ++ph) { for (size_t pw = 0; pw < outputW; ++pw) { - int dstart = pd * strideD - paddingD; - int hstart = ph * strideH - paddingH; - int wstart = pw * strideW - paddingW; - int dend = std::min(dstart + sizeZ, imgSizeD); - int hend = std::min(hstart + sizeY, imgSizeH); - int wend = std::min(wstart + sizeX, imgSizeW); - dstart = std::max(dstart, 0); - hstart = std::max(hstart, 0); - wstart = std::max(wstart, 0); - for (int d = dstart; d < dend; ++d) { - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - tgtGrad[(d * imgSizeH + h) * imgSizeW + w] = - scaleTargets * - tgtGrad[(d * imgSizeH + h) * imgSizeW + w] + - scaleOutput * otGrad[(pd * outputH + ph) * outputW + pw] * - (inData[(d * imgSizeH + h) * imgSizeW + w] == - otData[(pd * outputH + ph) * outputW + pw]); - } - } - } + const size_t index = (pd * outputH + ph) * outputW + pw; + const size_t tgtIdx = static_cast(maxPoolIdxData[index]); + tgtGrad[tgtIdx] = + scaleTargets * tgtGrad[tgtIdx] + scaleOutput * otGrad[index]; } } } // offset - inData += imgSizeD * imgSizeH * imgSizeW; tgtGrad += imgSizeD * imgSizeH * imgSizeW; - otData += outputD * outputH * outputW; otGrad += outputD * outputH * outputW; + maxPoolIdxData += outputD * outputH * outputW; } } } diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index dec9702433cdcc3f4cc237ebf382960a166d5968..e674c1e9abef2e27efb81f894d101ddd573b656f 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -933,6 +933,7 @@ public: * in the sizeX of value */ virtual void maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, size_t channels, size_t imgSizeD, size_t imgSizeH, @@ -952,9 +953,8 @@ public: LOG(FATAL) << "Not implemeted"; } - virtual void maxPool3DBackward(Matrix& image, - Matrix& outGrad, - Matrix& outV, + virtual void maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, @@ -1436,6 +1436,7 @@ public: size_t paddingW); void maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, size_t channels, size_t imgSizeD, size_t imgSizeH, @@ -1453,9 +1454,8 @@ public: size_t paddingH, size_t paddingW); - void maxPool3DBackward(Matrix& image, - Matrix& outGrad, - Matrix& outV, + void maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, @@ -1671,6 +1671,7 @@ public: size_t paddingW); void maxPool3DForward(Matrix& inputMat, + Matrix& maxPoolIdx, size_t channels, size_t imgSizeD, size_t imgSizeH, @@ -1688,9 +1689,8 @@ public: size_t paddingH, size_t paddingW); - void maxPool3DBackward(Matrix& image, - Matrix& outGrad, - Matrix& outV, + void maxPool3DBackward(Matrix& outGrad, + Matrix& maxPoolIdx, size_t imgSizeD, size_t imgSizeH, size_t imgSizeW, diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index 21ee8543cd2ea7c635491f5a5cf5ea35250f73be..d7ad6f18ac0aa83f5769994f8a2304ae0c70540d 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -1204,202 +1204,398 @@ TEST(Matrix, warpCTC) { } } -void testMatrixPool3D(int depth, int height, int width) { - int channel = 3; - int filterX = 3, filterY = 4, filterZ = 5; - int strideX = 2, strideY = 2, strideZ = 2; - int padX = 1, padY = 1, padZ = 1; - - MatrixPtr cpuImage = - std::make_shared(1, channel * depth * height * width); - MatrixPtr gpuImage = - std::make_shared(1, channel * depth * height * width); - - int outD = outputSize(depth, filterZ, padZ, strideZ, true); - int outH = outputSize(height, filterY, padZ, strideY, true); - int outW = outputSize(width, filterX, padZ, strideX, true); - - int colBufWidth = outD * outH * outW; - MatrixPtr cpuOutput = std::make_shared(1, channel * colBufWidth); - MatrixPtr gpuOutput = std::make_shared(1, channel * colBufWidth); - - cpuImage->randomizeUniform(); - gpuImage->copyFrom(*cpuImage); - cpuOutput->maxPool3DForward(*cpuImage, - channel, - depth, - height, - width, - outD, - outH, - outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX); - gpuOutput->maxPool3DForward(*gpuImage, - channel, - depth, - height, - width, - outD, - outH, - outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX); - TensorCheckErr(*cpuOutput, *gpuOutput); +void testMaxPool3DFwdBwd(int numSamples, + int channels, + int imgSizeD, + int imgSizeH, + int imgSizeW, + int ksizeD, + int ksizeH, + int ksizeW, + int strideD, + int strideH, + int strideW, + int padD, + int padH, + int padW) { + int outD = outputSize(imgSizeD, ksizeD, padD, strideD, true); + int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true); + int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true); + + int inWidth = channels * imgSizeD * imgSizeH * imgSizeW; + MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); - cpuImage->randomizeUniform(); - gpuImage->copyFrom(*cpuImage); - cpuOutput->avgPool3DForward(*cpuImage, - channel, - depth, - height, - width, - outD, - outH, - outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX); - - gpuOutput->avgPool3DForward(*gpuImage, - channel, - depth, - height, - width, - outD, - outH, - outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX); - TensorCheckErr(*cpuOutput, *gpuOutput); - cpuImage->randomizeUniform(); - gpuImage->copyFrom(*cpuImage); - cpuOutput->randomizeUniform(); - gpuOutput->copyFrom(*cpuOutput); - cpuImage->avgPool3DBackward(*cpuOutput, - depth, - height, - width, - outD, - outH, - outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX, - 1.0, - 1.0); - - gpuImage->avgPool3DBackward(*gpuOutput, - depth, - height, - width, - outD, - outH, - outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX, - 1.0, - 1.0); - TensorCheckErr(*cpuImage, *gpuImage); - - cpuImage->randomizeUniform(); - gpuImage->copyFrom(*cpuImage); - cpuOutput->randomizeUniform(); - gpuOutput->copyFrom(*cpuOutput); - cpuImage->maxPool3DBackward(*cpuImage, - *cpuOutput, - *cpuOutput, - depth, - height, - width, + int outWidth = channels * outD * outH * outW; + MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); + MatrixPtr maxIdx = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr maxIdxGpu = GpuMatrix::create(numSamples, outWidth, false, true); + + input->randomizeUniform(); + target->randomizeUniform(); + inputGpu->copyFrom(*input); + targetGpu->copyFrom(*target); + + target->maxPool3DForward(*input, + *maxIdx, + channels, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW); + targetGpu->maxPool3DForward(*inputGpu, + *maxIdxGpu, + channels, + imgSizeD, + imgSizeH, + imgSizeW, outD, outH, outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX, - 1.0, - 1.0); - - gpuImage->maxPool3DBackward(*gpuImage, - *gpuOutput, - *gpuOutput, - depth, - height, - width, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW); + MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); + targetCheck->copyFrom(*targetGpu); + checkMatrixEqual(target, targetCheck); + + MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); + MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpuGrad = + GpuMatrix::create(numSamples, outWidth, false, true); + + inputGrad->randomizeUniform(); + targetGrad->randomizeUniform(); + inputGpuGrad->copyFrom(*inputGrad); + targetGpuGrad->copyFrom(*targetGrad); + + inputGrad->maxPool3DBackward(*targetGrad, + *maxIdx, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW, + 1.0, + 1.0); + inputGpuGrad->maxPool3DBackward(*targetGpuGrad, + *maxIdxGpu, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW, + 1.0, + 1.0); + MatrixPtr targetBwdCheck = + CpuMatrix::create(numSamples, inWidth, false, false); + targetBwdCheck->copyFrom(*inputGpuGrad); + checkMatrixEqual(inputGrad, targetBwdCheck); +} + +void testAvgPool3DFwdBwd(int numSamples, + int channels, + int imgSizeD, + int imgSizeH, + int imgSizeW, + int ksizeD, + int ksizeH, + int ksizeW, + int strideD, + int strideH, + int strideW, + int padD, + int padH, + int padW) { + int outD = outputSize(imgSizeD, ksizeD, padD, strideD, true); + int outH = outputSize(imgSizeH, ksizeH, padH, strideH, true); + int outW = outputSize(imgSizeW, ksizeW, padW, strideW, true); + + int inWidth = imgSizeD * imgSizeH * imgSizeW * channels; + MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); + + int outWidth = channels * outD * outH * outW; + MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); + + input->randomizeUniform(); + target->randomizeUniform(); + inputGpu->copyFrom(*input); + targetGpu->copyFrom(*target); + + target->avgPool3DForward(*input, + channels, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW); + + targetGpu->avgPool3DForward(*inputGpu, + channels, + imgSizeD, + imgSizeH, + imgSizeW, outD, outH, outW, - filterZ, - filterY, - filterX, - strideZ, - strideY, - strideX, - padZ, - padY, - padX, - 1.0, - 1.0); - TensorCheckErr(*cpuImage, *gpuImage); + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW); + + TensorCheckErr(*target, *targetGpu); + + MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); + MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpuGrad = + GpuMatrix::create(numSamples, outWidth, false, true); + + inputGrad->randomizeUniform(); + targetGrad->randomizeUniform(); + inputGpuGrad->copyFrom(*inputGrad); + targetGpuGrad->copyFrom(*targetGrad); + + inputGrad->avgPool3DBackward(*targetGrad, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW, + 1.0, + 1.0); + + inputGpuGrad->avgPool3DBackward(*targetGpuGrad, + imgSizeD, + imgSizeH, + imgSizeW, + outD, + outH, + outW, + ksizeD, + ksizeH, + ksizeW, + strideD, + strideH, + strideW, + padD, + padH, + padW, + 1.0, + 1.0); + TensorCheckErr(*inputGrad, *inputGpuGrad); } -TEST(Matrix, Pool3D) { - for (auto depth : {9, 16, 64}) { - for (auto height : {9, 11, 128}) { - for (auto width : {9, 32, 128}) { - VLOG(3) << "depth=" << depth << " height=" << height - << " width=" << width; - testMatrixPool3D(depth, height, width); +// TODO(yi): I noticed many such blindly combinatorial tests in this +// file. They are no help to locate defects at all. +TEST(Matrix, Pool3DFwdBwd) { + for (auto numSamples : {1, 3}) { + for (auto channels : {3}) { + for (auto imgSizeD : {9, 16}) { + for (auto imgSizeH : {9, 32}) { + for (auto imgSizeW : {9, 32}) { + for (auto sizeX : {3}) { + for (auto sizeY : {3}) { + for (auto sizeZ : {3}) { + for (auto sD : {2}) { + for (auto sH : {2}) { + for (auto sW : {2}) { + for (auto pD : {0, (sizeZ - 1) / 2}) { + for (auto pH : {0, (sizeY - 1) / 2}) { + for (auto pW : {0, (sizeX - 1) / 2}) { + VLOG(3) << " numSamples=" << numSamples + << " channels=" << channels + << " imgSizeD=" << imgSizeD + << " imgSizeH=" << imgSizeH + << " imgSizeW=" << imgSizeW + << " sizeX=" << sizeX + << " sizeY=" << sizeY + << " sizeZ=" << sizeZ << " strideD=" << sD + << " strideH=" << sH << " strideW=" << sW + << " padingD=" << pD << " padingH=" << pH + << " padingW=" << pW; + + testMaxPool3DFwdBwd(numSamples, + channels, + imgSizeD, + imgSizeH, + imgSizeW, + sizeX, + sizeY, + sizeZ, + sD, + sH, + sW, + pD, + pH, + pW); + testAvgPool3DFwdBwd(numSamples, + channels, + imgSizeD, + imgSizeH, + imgSizeW, + sizeX, + sizeY, + sizeZ, + sD, + sH, + sW, + pD, + pH, + pW); + } + } + } + } + } + } + } + } + } + } + } } } } + + // + // for (auto numSamples : {1, 3}) { + // for (auto channels : {1, 3}) { + // for (auto imgSizeD : {9,16}) { + // for (auto imgSizeH : {9, 32}) { + // for (auto imgSizeW : {9, 32}) { + // for (auto sizeX : {2, 3}) { + // for (auto sizeY : {2, 3}) { + // for (auto sizeZ : {2,3}){ + // for (auto sD : {1, 2}) { + // for (auto sH : {1, 2}) { + // for (auto sW : {1, 2}) { + // for (auto pD : {0, (sizeZ - 1) / 2}){ + // for (auto pH : {0, (sizeY - 1) / 2}) { + // for (auto pW : {0, (sizeX - 1) / 2}) { + // VLOG(3) << " numSamples=" << numSamples + // << " channels=" << channels + // << " imgSizeD=" << imgSizeD + // << " imgSizeH=" << imgSizeH + // << " imgSizeW=" << imgSizeW + // << " sizeX=" << sizeX + // << " sizeY=" << sizeY + // << " sizeZ=" << sizeZ + // << " strideD=" << sD + // << " strideH=" << sH + // << " strideW=" << sW + // << " padingD=" << pD + // << " padingH=" << pH + // << " padingW=" << pW; + // + // testMaxPool3DFwdBwd(numSamples, + // channels, + // imgSizeD, + // imgSizeH, + // imgSizeW, + // sizeX, + // sizeY, + // sizeZ, + // sD, + // sH, + // sW, + // pD, + // pH, + // pW); + // testAvgPool3DFwdBwd(numSamples, + // channels, + // imgSizeD, + // imgSizeH, + // imgSizeW, + // sizeX, + // sizeY, + // sizeZ, + // sD, + // sH, + // sW, + // pD, + // pH, + // pW); + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } + // } } #endif