diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 27f7d95b752d4a423bf99fa425b10b2816575d6a..66868e73b33a2c250236bc2ddde2bbb5093ea70f 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1389,6 +1389,52 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { output_d, grad_d, mat_d, height_, width_); } +void GpuMatrix::vol2Col(real* data, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW) { + hl_matrix_vol2Col(data, + channels, depth, height, width, + filterD, filterH, filterW, + strideD, strideH, strideW, + paddingD, paddingH, paddingW, getData()); +} + +void GpuMatrix::col2Vol(real* trg, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + real alpha, + real beta) { + hl_matrix_col2Vol(trg, + channels, depth, height, width, + filterD, filterH, filterW, + strideD, strideH, strideW, + paddingD, paddingH, paddingW, + getData(), + alpha, beta); + } + /** * CpuMatrix */ @@ -3975,6 +4021,95 @@ void CpuMatrix::bilinearBackward(const Matrix& out, } } +void CpuMatrix::vol2Col(real* data, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW) { + real* outData = getData(); + int outHeight = (height + 2 * paddingH - filterH) / strideH + 1; + int outWidth = (width + 2 * paddingW - filterW) / strideW + 1; + int outDepth = (depth + 2 * paddingD - filterD) / strideD + 1; + + int channelsCol = channels * filterD * filterH * filterW; + for (int c = 0; c < channelsCol; ++c) { + int wOffset = c % filterW; + int hOffset = (c / filterW) % filterH; + int dOffset = (c / filterW / filterH) % filterD; + int cIn = c / filterW / filterH / filterD; + for (int d = 0; d < outDepth; ++d) { + for (int h = 0; h < outHeight; ++h) { + for (int w = 0; w < outWidth; ++w) { + int dPad = d * strideD - paddingD + dOffset; + int hPad = h * strideH - paddingH + hOffset; + int wPad = w * strideW - paddingW + wOffset; + + if (hPad >= 0 && hPad < height && wPad >= 0 && wPad < width && + dPad >= 0 && dPad < depth) + outData[((c * outDepth + d) * outHeight + h) * outWidth + w] = + data[((cIn * depth + dPad) * height + hPad) * width + wPad]; + else + outData[((c * outDepth + d) * outHeight + h) * outWidth + w] = 0; + } + } + } + } +} + +void CpuMatrix::col2Vol(real* trg, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + real alpha, + real beta) { + real* src = getData(); + int outDepth = (depth + 2 * paddingH - filterD) / strideD + 1; + int outHeight = (height + 2 * paddingH - filterH) / strideH + 1; + int outWidth = (width + 2 * paddingW - filterW) / strideW + 1; + int channelsCol = channels * filterD * filterH * filterW; + for (int c = 0; c < channelsCol; ++c) { + int wOffset = c % filterW; + int hOffset = (c / filterW) % filterH; + int dOffset = (c / filterW / filterH) % filterD; + int cIm = c / filterW / filterH / filterD; + for (int d = 0; d < outDepth; ++d) { + for (int h = 0; h < outHeight; ++h) { + for (int w = 0; w < outWidth; ++w) { + int dPad = d * strideD - paddingD + dOffset; + int hPad = h * strideH - paddingH + hOffset; + int wPad = w * strideW - paddingW + wOffset; + if (hPad >= 0 && hPad < height && wPad >= 0 && wPad < width && + dPad >= 0 && dPad < depth) + trg[((cIm * depth + dPad) * height + hPad) * width + wPad] = + alpha * + src[((c * outDepth + d) * outHeight + h) * outWidth + w] + + beta * + trg[((cIm * depth + dPad) * height + hPad) * width + wPad]; + } + } + } + } +} + //////////////////////////////////////////////////////////////// // functions executed via cpu // //////////////////////////////////////////////////////////////// diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index bb802bbb2c75289a45d987b22ad41ce8b1e95c98..4354996ce0b02daffd6211796a99acc067a19153 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -1039,6 +1039,42 @@ public: LOG(FATAL) << "Not implemented"; } + virtual void vol2Col(real* data, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW) { + LOG(FATAL) << "Not implemeted"; + } + + virtual void col2Vol(real* trg, + int channels, + int depth, + int height, + int width, + int filterD, + int filterH, + int filterW, + int strideD, + int strideH, + int strideW, + int paddingD, + int paddingH, + int paddingW, + real alpha, + real beta) { + LOG(FATAL) << "Not implemeted"; + } + virtual void bilinearForward(const Matrix& in, const size_t inImgH, const size_t inImgW, @@ -1374,6 +1410,20 @@ public: const real ratioH, const real ratioW); + void vol2Col(real* data, + int channels, + int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW); + + void col2Vol(real* trg, + int channels, int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW, + real alpha, real beta); + void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label); void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label); @@ -1715,6 +1765,20 @@ public: const real ratioH, const real ratioW); + void vol2Col(real* data, + int channels, + int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW); + + void col2Vol(real* trg, + int channels, int depth, int height, int width, + int filterD, int filterH, int filterW, + int strideD, int strideH, int strideW, + int paddingD, int paddingH, int paddingW, + real alpha, real beta); + template void operator=(const ExpressionType& expr) { TensorCpuApply(*this, expr);