Add col2vol and vol2col CPU funtion

52ceeedb · chengduoZH · 23cf0c61 · 52ceeedb · 52ceeedb
隐藏空白更改
内联并排

Showing with 199 addition and 0 deletion

paddle/math/Matrix.cpp paddle/math/Matrix.cpp +135 -0

paddle/math/Matrix.h paddle/math/Matrix.h +64 -0

未找到文件。
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
@@ -1389,6 +1389,52 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
      output_d, grad_d, mat_d, height_, width_);
 }

+void GpuMatrix::vol2Col(real* data,
+                int channels,
+                int depth,
+                int height,
+                int width,
+                int filterD,
+                int filterH,
+                int filterW,
+                int strideD,
+                int strideH,
+                int strideW,
+                int paddingD,
+                int paddingH,
+                int paddingW) {
+  hl_matrix_vol2Col(data,
+          channels, depth, height, width,
+          filterD, filterH, filterW,
+          strideD, strideH, strideW,
+          paddingD, paddingH, paddingW, getData());
+}
+
+void GpuMatrix::col2Vol(real* trg,
+                int channels,
+                int depth,
+                int height,
+                int width,
+                int filterD,
+                int filterH,
+                int filterW,
+                int strideD,
+                int strideH,
+                int strideW,
+                int paddingD,
+                int paddingH,
+                int paddingW,
+                real alpha,
+                real beta) {
+  hl_matrix_col2Vol(trg,
+                    channels, depth, height, width,
+                    filterD, filterH, filterW,
+                    strideD, strideH, strideW,
+                    paddingD, paddingH, paddingW,
+                    getData(),
+                    alpha, beta);
+   }
+
 /**
 * CpuMatrix
 */
@@ -3975,6 +4021,95 @@ void CpuMatrix::bilinearBackward(const Matrix& out,
  }
 }

+void CpuMatrix::vol2Col(real* data,
+                        int channels,
+                        int depth,
+                        int height,
+                        int width,
+                        int filterD,
+                        int filterH,
+                        int filterW,
+                        int strideD,
+                        int strideH,
+                        int strideW,
+                        int paddingD,
+                        int paddingH,
+                        int paddingW) {
+  real* outData = getData();
+  int outHeight = (height + 2 * paddingH - filterH) / strideH + 1;
+  int outWidth = (width + 2 * paddingW - filterW) / strideW + 1;
+  int outDepth = (depth + 2 * paddingD - filterD) / strideD + 1;
+
+  int channelsCol = channels * filterD * filterH * filterW;
+  for (int c = 0; c < channelsCol; ++c) {
+    int wOffset = c % filterW;
+    int hOffset = (c / filterW) % filterH;
+    int dOffset = (c / filterW / filterH) % filterD;
+    int cIn = c / filterW / filterH / filterD;
+    for (int d = 0; d < outDepth; ++d) {
+      for (int h = 0; h < outHeight; ++h) {
+        for (int w = 0; w < outWidth; ++w) {
+          int dPad = d * strideD - paddingD + dOffset;
+          int hPad = h * strideH - paddingH + hOffset;
+          int wPad = w * strideW - paddingW + wOffset;
+
+          if (hPad >= 0 && hPad < height && wPad >= 0 && wPad < width &&
+              dPad >= 0 && dPad < depth)
+            outData[((c * outDepth + d) * outHeight + h) * outWidth + w] =
+                data[((cIn * depth + dPad) * height + hPad) * width + wPad];
+          else
+            outData[((c * outDepth + d) * outHeight + h) * outWidth + w] = 0;
+        }
+      }
+    }
+  }
+}
+
+void CpuMatrix::col2Vol(real* trg,
+                        int channels,
+                        int depth,
+                        int height,
+                        int width,
+                        int filterD,
+                        int filterH,
+                        int filterW,
+                        int strideD,
+                        int strideH,
+                        int strideW,
+                        int paddingD,
+                        int paddingH,
+                        int paddingW,
+                        real alpha,
+                        real beta) {
+  real* src = getData();
+  int outDepth =  (depth + 2 * paddingH - filterD) / strideD + 1;
+  int outHeight = (height + 2 * paddingH - filterH) / strideH + 1;
+  int outWidth = (width + 2 * paddingW - filterW) / strideW + 1;
+  int channelsCol = channels * filterD * filterH * filterW;
+  for (int c = 0; c < channelsCol; ++c) {
+    int wOffset = c % filterW;
+    int hOffset = (c / filterW) % filterH;
+    int dOffset = (c / filterW / filterH) % filterD;
+    int cIm = c / filterW / filterH / filterD;
+    for (int d = 0; d < outDepth; ++d) {
+      for (int h = 0; h < outHeight; ++h) {
+        for (int w = 0; w < outWidth; ++w) {
+          int dPad = d * strideD - paddingD + dOffset;
+          int hPad = h * strideH - paddingH + hOffset;
+          int wPad = w * strideW - paddingW + wOffset;
+          if (hPad >= 0 && hPad < height && wPad >= 0 && wPad < width &&
+              dPad >= 0 && dPad < depth)
+            trg[((cIm * depth + dPad) * height + hPad) * width + wPad] =
+                alpha *
+                    src[((c * outDepth + d) * outHeight + h) * outWidth + w] +
+                beta *
+                    trg[((cIm * depth + dPad) * height + hPad) * width + wPad];
+        }
+      }
+    }
+  }
+}
+
 ////////////////////////////////////////////////////////////////
 //               functions executed via cpu                   //
 ////////////////////////////////////////////////////////////////

--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@@ -1039,6 +1039,42 @@ public:
    LOG(FATAL) << "Not implemented";
  }

+  virtual void vol2Col(real* data,
+                 int channels,
+                 int depth,
+                 int height,
+                 int width,
+                 int filterD,
+                 int filterH,
+                 int filterW,
+                 int strideD,
+                 int strideH,
+                 int strideW,
+                 int paddingD,
+                 int paddingH,
+                 int paddingW) {
+      LOG(FATAL) << "Not implemeted";
+    }
+
+    virtual void col2Vol(real* trg,
+                 int channels,
+                 int depth,
+                 int height,
+                 int width,
+                 int filterD,
+                 int filterH,
+                 int filterW,
+                 int strideD,
+                 int strideH,
+                 int strideW,
+                 int paddingD,
+                 int paddingH,
+                 int paddingW,
+                 real alpha,
+                 real beta) {
+      LOG(FATAL) << "Not implemeted";
+    }
+
  virtual void bilinearForward(const Matrix& in,
                               const size_t inImgH,
                               const size_t inImgW,
@@ -1374,6 +1410,20 @@ public:
                        const real ratioH,
                        const real ratioW);

+  void vol2Col(real* data,
+                 int channels,
+                 int depth, int height, int width,
+                 int filterD, int filterH, int filterW,
+                 int strideD, int strideH, int strideW,
+                 int paddingD, int paddingH, int paddingW);
+
+  void col2Vol(real* trg,
+               int channels, int depth, int height, int width,
+               int filterD, int filterH, int filterW,
+               int strideD, int strideH, int strideW,
+               int paddingD, int paddingH, int paddingW,
+               real alpha, real beta);
+
  void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label);

  void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label);
@@ -1715,6 +1765,20 @@ public:
                        const real ratioH,
                        const real ratioW);

+  void vol2Col(real* data,
+               int channels,
+               int depth, int height, int width,
+               int filterD, int filterH, int filterW,
+               int strideD, int strideH, int strideW,
+               int paddingD, int paddingH, int paddingW);
+
+  void col2Vol(real* trg,
+               int channels, int depth, int height, int width,
+               int filterD, int filterH, int filterW,
+               int strideD, int strideH, int strideW,
+               int paddingD, int paddingH, int paddingW,
+               real alpha, real beta);
+
  template <typename ExpressionType>
  void operator=(const ExpressionType& expr) {
    TensorCpuApply<real>(*this, expr);