Merge remote-tracking branch 'origin/develop' into develop

2c668b4d · qnqinan · f14da1e1 · 7cb3f3d6 · 2c668b4d
隐藏空白更改
内联并排

Showing with 84 addition and 30 deletion

test/common/test_gemm.cpp test/common/test_gemm.cpp +84 -30

未找到文件。
--- a/test/common/test_gemm.cpp
+++ b/test/common/test_gemm.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#include <cstdlib>
+#include <ctime>
 #include <iostream>
 #include "../test_helper.h"
 #include "common/log.h"
@@ -20,13 +22,21 @@ limitations under the License. */
 #define a(i, j) a[(i)*lda + (j)]
 #define b(i, j) b[(i)*ldb + (j)]
+#define c(i, j) c[(i)*ldc + (j)]
 #define c1(i, j) c1[(i)*ldc + (j)]
-#define m 62
+void print_matirx(int m, int n, int ldc, float *c) {
-#define n 63
+  for (int i = 0; i < m; ++i) {
-#define k 74
+    std::cout << c(i, 0);
+    for (int j = 1; j < n; ++j) {
+      std::cout << " | " << c(i, j);
+    }
+    std::cout << std::endl;
+  }
+  std::cout << std::endl;
+}
-int main() {
+int do_sgemm(int m, int n, int k, bool relu, int t1, int t2, int pr) {
  int lda = k;
  int ldb = n;
  int ldc = n;
@@ -39,44 +49,88 @@ int main() {
      static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * n));
  float *c1 =
      static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * n));
+  float *scale =
+      static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m));
+  float *bias =
+      static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m));
+  srand(unsigned(time(0)));
  for (int i = 0; i < m * k; ++i) {
-    a[i] = 2;
+    a[i] = t1 + rand() % t2;
  }
  for (int i = 0; i < k * n; ++i) {
-    b[i] = 2;
+    b[i] = t1 + rand() % t2;
  }
-  for (int i = 0; i < m * n; ++i) {
+  for (int i = 0; i < m; ++i) {
-    c[i] = 2;
+    scale[i] = t1 + rand() % t2;
-    c1[i] = 2;
  }
+  for (int i = 0; i < m; ++i) {
-  auto time1 = time();
+    bias[i] = t1 + rand() % t2;
-  //  paddle_mobile::operators::math::Sgemm(m, n, k, 0.9, a, lda, b, ldb, 0.3,
-  //  c,
-  //                                        ldc);
-  auto time2 = time();
-  DLOG << "gemm cost :" << time_diff(time1, time2) << "ms\n";
-  for (int i = 0; i < m * n; ++i) {
-    std::cout << c[i] << " | ";
-    if (i % n == (n - 1)) {
-      std::cout << std::endl;
-    }
  }
-  for (int j = 0; j < n; ++j) {
-    for (int i = 0; i < m; ++i) {
+  for (int i = 0; i < m; ++i) {
-      c1(i, j) *= 0.3;
+    for (int j = 0; j < n; ++j) {
-      for (int p = 0; p < k; ++p) {
+      float r = 0;
-        c1(i, j) += 0.9 * a(i, p) * b(p, j);
+      for (int p = 0; p < k; p++) {
+        r += a(i, p) * b(p, j);
      }
+      r *= scale[i];
+      r += bias[i];
+      if (relu && (r < 0)) {
+        r = 0;
+      }
+      c1(i, j) = r;
    }
  }
-  std::cout << "正确结果对比:" << std::endl;
+  paddle_mobile::operators::math::SgemmWithBn(m, n, k, 0.9, a, lda, b, ldb, 0.3,
+                                              c, ldc, relu, scale, bias);
+  int eq = 0;
+  int neq = 0;
  for (int i = 0; i < m * n; ++i) {
-    std::cout << c1[i] << " | ";
+    if (static_cast<int>(c[i]) == static_cast<int>(c1[i])) {
-    if (i % n == (n - 1)) {
+      ++eq;
-      std::cout << std::endl;
+    } else {
+      ++neq;
    }
  }
+  if (pr > 0) {
+    std::cout << "A:" << std::endl;
+    print_matirx(m, k, lda, a);
+    std::cout << "B:" << std::endl;
+    print_matirx(k, n, ldb, b);
+    std::cout << "C:" << std::endl;
+    print_matirx(m, n, ldc, c);
+    std::cout << "C1:" << std::endl;
+    print_matirx(m, n, ldc, c1);
+  }
+  std::cout << "mnk=" << m << " " << n << " " << k << " relu=" << relu
+            << "   eq=" << eq << " neq=" << neq << std::endl;
+  paddle_mobile::memory::Free(a);
+  paddle_mobile::memory::Free(b);
+  paddle_mobile::memory::Free(c);
+  paddle_mobile::memory::Free(c1);
+  paddle_mobile::memory::Free(scale);
+  paddle_mobile::memory::Free(bias);
+  return 0;
+}
+int main() {
+  do_sgemm(9, 9, 9, true, 10, 10, 10);
+  do_sgemm(10, 6, 12, false, 10, 10, 0);
+  do_sgemm(512, 256, 384, false, 10, 10, 0);
+  do_sgemm(1366, 768, 256, false, 10, 10, 0);
+  do_sgemm(1255, 755, 333, false, 10, 10, 0);
+  do_sgemm(555, 777, 999, false, 10, 10, 0);
+  do_sgemm(10, 6, 12, true, -4, 10, 0);
+  do_sgemm(512, 256, 384, true, -4, 10, 0);
+  do_sgemm(1366, 768, 256, true, -4, 10, 0);
+  do_sgemm(1255, 755, 333, true, -4, 10, 0);
+  do_sgemm(555, 777, 999, true, -4, 10, 0);
  return 0;
 }