From b19e1a1b567a68cba48486b1cd48cd4e67dd2732 Mon Sep 17 00:00:00 2001
From: Leo Zhao <48052473+LeoZhao-Intel@users.noreply.github.com>
Date: Sun, 24 Nov 2019 21:50:00 +0800
Subject: [PATCH] use prefetch to load next mem into cache (#21206)

* use prefetch to load next mem into cache

test=develop

* remove hard code memcpy om pyramid_hash_ff

test=develop
---
 paddle/fluid/operators/pyramid_hash_op.cc | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/paddle/fluid/operators/pyramid_hash_op.cc b/paddle/fluid/operators/pyramid_hash_op.cc
index bb1abe3a89..b47d2191f6 100644
--- a/paddle/fluid/operators/pyramid_hash_op.cc
+++ b/paddle/fluid/operators/pyramid_hash_op.cc
@@ -161,14 +161,21 @@ class CPUPyramidHashOPKernel : public framework::OpKernel<T> {
   void hash_embedding_ff(const T* hash_id, int len, T* top_pos,
                          const T* weights, int _num_emb, int _rand_len,
                          int _space_len) const {
+    unsigned int pos1 = XXH32(hash_id, len * sizeof(T), 0) % _space_len;
+    unsigned int pos2 = XXH32(hash_id, len * sizeof(T), _rand_len) % _space_len;
+
     for (unsigned int j = 0; j != _num_emb; j += _rand_len) {
-      unsigned int pos = XXH32(hash_id, len * sizeof(T), j) % _space_len;
-      if (_rand_len == 16) {
-        memcpy(top_pos + j, const_cast<float*>(weights + pos), 16 * sizeof(T));
-      } else {
-        memcpy(top_pos + j, const_cast<float*>(weights + pos),
-               _rand_len * sizeof(T));
+      if (j + _rand_len < _num_emb) {
+        __builtin_prefetch(weights + pos2);
+        __builtin_prefetch(top_pos + j + _rand_len);
       }
+
+      unsigned int pos3 =
+          XXH32(hash_id, len * sizeof(T), j + 2 * _rand_len) % _space_len;
+      memcpy(top_pos + j, const_cast<float*>(weights + pos1),
+             _rand_len * sizeof(T));
+      pos1 = pos2;
+      pos2 = pos3;
     }
   }
 
-- 
GitLab