diff --git a/cache/clock_cache.h b/cache/clock_cache.h
index 75a7b43a3fdaa64f56b3cf4dd6d93db14dcf1fa4..7a1caa023f3e74fdb3178350f0799c869c56711c 100644
--- a/cache/clock_cache.h
+++ b/cache/clock_cache.h
@@ -24,6 +24,7 @@
 #include "rocksdb/cache.h"
 #include "rocksdb/secondary_cache.h"
 #include "util/autovector.h"
+#include "util/math.h"
 
 namespace ROCKSDB_NAMESPACE {
 
@@ -563,7 +564,7 @@ class HyperClockTable : public BaseClockTable {
  private:  // functions
   // Returns x mod 2^{length_bits_}.
   inline size_t ModTableSize(uint64_t x) {
-    return static_cast<size_t>(x) & length_bits_mask_;
+    return BitwiseAnd(x, length_bits_mask_);
   }
 
   // Returns the first slot in the probe sequence with a handle e such that
diff --git a/cache/sharded_cache.cc b/cache/sharded_cache.cc
index cb8555b35a38c2c1a6a159726d2c5450fba25f1d..322b59226e039214785ca97a86613fe81a3585ab 100644
--- a/cache/sharded_cache.cc
+++ b/cache/sharded_cache.cc
@@ -38,7 +38,7 @@ uint32_t DetermineSeed(int32_t hash_seed_option) {
       return GetSliceHash(hostname) & kSeedMask;
     } else {
       // Fall back on something stable within the process.
-      return static_cast<uint32_t>(gen.GetBaseUpper()) & kSeedMask;
+      return BitwiseAnd(gen.GetBaseUpper(), kSeedMask);
     }
   } else {
     // for kQuasiRandomHashSeed and fallback
diff --git a/util/core_local.h b/util/core_local.h
index 25174aef84c63cc1e954b91b04095a64a9941db7..9c5b3f2815ea7160ffe421f46bc046253bd1153c 100644
--- a/util/core_local.h
+++ b/util/core_local.h
@@ -13,6 +13,7 @@
 
 #include "port/likely.h"
 #include "port/port.h"
+#include "util/math.h"
 #include "util/random.h"
 
 namespace ROCKSDB_NAMESPACE {
@@ -70,7 +71,7 @@ std::pair<T*, size_t> CoreLocalArray<T>::AccessElementAndIndex() const {
     // cpu id unavailable, just pick randomly
     core_idx = Random::GetTLSInstance()->Uniform(1 << size_shift_);
   } else {
-    core_idx = static_cast<size_t>(cpuid & ((1 << size_shift_) - 1));
+    core_idx = static_cast<size_t>(BottomNBits(cpuid, size_shift_));
   }
   return {AccessAtCore(core_idx), core_idx};
 }
diff --git a/util/hash_test.cc b/util/hash_test.cc
index 72112b0448136d440b45c297353ca1c700367857..ccc283a24376ab414cd53487bcb7af689e8c49be 100644
--- a/util/hash_test.cc
+++ b/util/hash_test.cc
@@ -565,6 +565,8 @@ size_t FastRange64(uint64_t hash, size_t range) {
 // Tests for math.h / math128.h (not worth a separate test binary)
 using ROCKSDB_NAMESPACE::BitParity;
 using ROCKSDB_NAMESPACE::BitsSetToOne;
+using ROCKSDB_NAMESPACE::BitwiseAnd;
+using ROCKSDB_NAMESPACE::BottomNBits;
 using ROCKSDB_NAMESPACE::ConstexprFloorLog2;
 using ROCKSDB_NAMESPACE::CountTrailingZeroBits;
 using ROCKSDB_NAMESPACE::DecodeFixed128;
@@ -580,6 +582,19 @@ using ROCKSDB_NAMESPACE::Upper64of128;
 
 int blah(int x) { return DownwardInvolution(x); }
 
+template <typename T1, typename T2>
+static void test_BitwiseAnd(T1 v1, T2 v2) {
+  auto a = BitwiseAnd(v1, v2);
+  // Essentially repeating the implementation :-/
+  if constexpr (sizeof(T1) < sizeof(T2)) {
+    static_assert(std::is_same_v<decltype(a), T1>);
+    EXPECT_EQ(a, static_cast<T1>(v1 & v2));
+  } else {
+    static_assert(std::is_same_v<decltype(a), T2>);
+    EXPECT_EQ(a, static_cast<T2>(v1 & v2));
+  }
+}
+
 template <typename T>
 static void test_BitOps() {
   // This complex code is to generalize to 128-bit values. Otherwise
@@ -598,6 +613,22 @@ static void test_BitOps() {
     // If we could directly use arithmetic:
     // T vm1 = static_cast<T>(v - 1);
 
+    // BottomNBits
+    {
+      // An essentially full length value
+      T x = everyOtherBit;
+      if (i > 2) {
+        // Make it slightly irregular
+        x = x ^ (T{1} << (i / 2));
+      }
+      auto a = BottomNBits(x, i);
+      auto b = BottomNBits(~x, i);
+      EXPECT_EQ(x | a, x);
+      EXPECT_EQ(a | b, vm1);
+      EXPECT_EQ(a & b, T{0});
+      EXPECT_EQ(BottomNBits(x ^ a, i), T{0});
+    }
+
     // FloorLog2
     if (v > 0) {
       EXPECT_EQ(FloorLog2(v), i);
@@ -707,9 +738,22 @@ static void test_BitOps() {
       }
     }
 
+    // BitwiseAnd
+    {
+      test_BitwiseAnd(vm1, static_cast<char>(0x99));
+      test_BitwiseAnd(v, static_cast<char>(0x99));
+      test_BitwiseAnd(char{0x66}, vm1);
+      test_BitwiseAnd(char{0x66}, v);
+      test_BitwiseAnd(v, int16_t{0x6699});
+      test_BitwiseAnd(v, uint16_t{0x9966});
+      test_BitwiseAnd(int64_t{0x1234234534564567}, v);
+      test_BitwiseAnd(uint64_t{0x9876876576545432}, v);
+    }
+
     vm1 = (vm1 << 1) | 1;
   }
 
+  // ConstexprFloorLog2
   EXPECT_EQ(ConstexprFloorLog2(T{1}), 0);
   EXPECT_EQ(ConstexprFloorLog2(T{2}), 1);
   EXPECT_EQ(ConstexprFloorLog2(T{3}), 1);
diff --git a/util/math.h b/util/math.h
index 39f30832870e9186c3dc2584585faf1416f8909d..e1948e0a313e64a4acd3c855503cb0639bf453c3 100644
--- a/util/math.h
+++ b/util/math.h
@@ -9,6 +9,9 @@
 #ifdef _MSC_VER
 #include <intrin.h>
 #endif
+#ifdef __BMI2__
+#include <immintrin.h>
+#endif
 
 #include <cstdint>
 #include <type_traits>
@@ -20,11 +23,33 @@ ASSERT_FEATURE_COMPAT_HEADER();
 
 namespace ROCKSDB_NAMESPACE {
 
+// Fast implementation of extracting the bottom n bits of an integer.
+// To ensure fast implementation, undefined if n bits is full width or more.
+template <typename T>
+inline T BottomNBits(T v, int nbits) {
+  static_assert(std::is_integral_v<T>, "non-integral type");
+  static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
+  assert(nbits >= 0);
+  assert(nbits < int{8 * sizeof(T)});
+#ifdef __BMI2__
+  if constexpr (sizeof(T) <= 4) {
+    return static_cast<T>(_bzhi_u32(static_cast<uint32_t>(v), nbits));
+  }
+  if constexpr (sizeof(T) <= 8) {
+    return static_cast<T>(_bzhi_u64(static_cast<uint64_t>(v), nbits));
+  }
+#endif
+  // Newer compilers compile this down to bzhi on x86, but some older
+  // ones don't, thus the need for the intrinsic above.
+  return static_cast<T>(v & ((T{1} << nbits) - 1));
+}
+
 // Fast implementation of floor(log2(v)). Undefined for 0 or negative
 // numbers (in case of signed type).
 template <typename T>
 inline int FloorLog2(T v) {
-  static_assert(std::is_integral<T>::value, "non-integral type");
+  static_assert(std::is_integral_v<T>, "non-integral type");
+  static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
   assert(v > 0);
 #ifdef _MSC_VER
   static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
@@ -63,6 +88,8 @@ inline int FloorLog2(T v) {
 // Constexpr version of FloorLog2
 template <typename T>
 constexpr int ConstexprFloorLog2(T v) {
+  // NOTE: not checking is_integral so that this works with Unsigned128
+  static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
   int rv = 0;
   while (v > T{1}) {
     ++rv;
@@ -74,7 +101,8 @@ constexpr int ConstexprFloorLog2(T v) {
 // Number of low-order zero bits before the first 1 bit. Undefined for 0.
 template <typename T>
 inline int CountTrailingZeroBits(T v) {
-  static_assert(std::is_integral<T>::value, "non-integral type");
+  static_assert(std::is_integral_v<T>, "non-integral type");
+  static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
   assert(v != 0);
 #ifdef _MSC_VER
   static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
@@ -115,6 +143,9 @@ namespace detail {
 
 template <typename T>
 int BitsSetToOneFallback(T v) {
+  static_assert(std::is_integral_v<T>, "non-integral type");
+  static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
+
   const int kBits = static_cast<int>(sizeof(T)) * 8;
   static_assert((kBits & (kBits - 1)) == 0, "must be power of two bits");
   // we static_cast these bit patterns in order to truncate them to the correct
@@ -140,7 +171,9 @@ int BitsSetToOneFallback(T v) {
 // Number of bits set to 1. Also known as "population count".
 template <typename T>
 inline int BitsSetToOne(T v) {
-  static_assert(std::is_integral<T>::value, "non-integral type");
+  static_assert(std::is_integral_v<T>, "non-integral type");
+  static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
+
 #ifdef _MSC_VER
   static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
   if (sizeof(T) < sizeof(uint32_t)) {
@@ -192,7 +225,9 @@ inline int BitsSetToOne(T v) {
 
 template <typename T>
 inline int BitParity(T v) {
-  static_assert(std::is_integral<T>::value, "non-integral type");
+  static_assert(std::is_integral_v<T>, "non-integral type");
+  static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
+
 #ifdef _MSC_VER
   // bit parity == oddness of popcount
   return BitsSetToOne(v) & 1;
@@ -214,7 +249,8 @@ inline int BitParity(T v) {
 // encode/decode big endian.
 template <typename T>
 inline T EndianSwapValue(T v) {
-  static_assert(std::is_integral<T>::value, "non-integral type");
+  static_assert(std::is_integral_v<T>, "non-integral type");
+  static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
 
 #ifdef _MSC_VER
   if (sizeof(T) == 2) {
@@ -244,6 +280,9 @@ inline T EndianSwapValue(T v) {
 // Reverses the order of bits in an integral value
 template <typename T>
 inline T ReverseBits(T v) {
+  static_assert(std::is_integral_v<T>, "non-integral type");
+  static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
+
   T r = EndianSwapValue(v);
   const T kHighestByte = T{1} << ((sizeof(T) - 1) * 8);
   const T kEveryByte = kHighestByte | (kHighestByte / 255);
@@ -277,7 +316,8 @@ inline T ReverseBits(T v) {
 // is that all square sub-matrices that include the top row are invertible.
 template <typename T>
 inline T DownwardInvolution(T v) {
-  static_assert(std::is_integral<T>::value, "non-integral type");
+  static_assert(std::is_integral_v<T>, "non-integral type");
+  static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
   static_assert(sizeof(T) <= 8, "only supported up to 64 bits");
 
   uint64_t r = static_cast<uint64_t>(v);
@@ -296,4 +336,16 @@ inline T DownwardInvolution(T v) {
   return static_cast<T>(r);
 }
 
+// Bitwise-And with typing that allows you to avoid writing an explicit cast
+// to the smaller type, or the type of the right parameter if same size.
+template <typename A, typename B>
+inline std::conditional_t<sizeof(A) < sizeof(B), A, B> BitwiseAnd(A a, B b) {
+  static_assert(std::is_integral_v<A>, "non-integral type");
+  static_assert(std::is_integral_v<B>, "non-integral type");
+  static_assert(!std::is_reference_v<A>, "use std::remove_reference_t");
+  static_assert(!std::is_reference_v<B>, "use std::remove_reference_t");
+  using Smaller = std::conditional_t<sizeof(A) < sizeof(B), A, B>;
+  return static_cast<Smaller>(a & b);
+}
+
 }  // namespace ROCKSDB_NAMESPACE
diff --git a/util/math128.h b/util/math128.h
index ae490051a78b905b5542c851859aa49d4bdb630e..5f96dbc66daf97b27a6669ed71a17cc534cd0d81 100644
--- a/util/math128.h
+++ b/util/math128.h
@@ -41,13 +41,13 @@ struct Unsigned128 {
     hi = upper;
   }
 
-  explicit operator uint64_t() { return lo; }
-
-  explicit operator uint32_t() { return static_cast<uint32_t>(lo); }
-
-  explicit operator uint16_t() { return static_cast<uint16_t>(lo); }
-
-  explicit operator uint8_t() { return static_cast<uint8_t>(lo); }
+  // Convert to any integer 64 bits or less.
+  template <typename T,
+            typename = std::enable_if_t<std::is_integral_v<T> &&
+                                        sizeof(T) <= sizeof(uint64_t)> >
+  explicit operator T() {
+    return static_cast<T>(lo);
+  }
 };
 
 inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) {
@@ -190,6 +190,16 @@ inline Unsigned128 Multiply64to128(uint64_t a, uint64_t b) {
 #endif
 }
 
+template <>
+inline Unsigned128 BottomNBits(Unsigned128 v, int nbits) {
+  if (nbits < 64) {
+    return BottomNBits(Lower64of128(v), nbits);
+  } else {
+    return (Unsigned128{BottomNBits(Upper64of128(v), nbits - 64)} << 64) |
+           Lower64of128(v);
+  }
+}
+
 template <>
 inline int FloorLog2(Unsigned128 v) {
   if (Upper64of128(v) == 0) {
@@ -236,6 +246,18 @@ inline Unsigned128 DownwardInvolution(Unsigned128 v) {
          DownwardInvolution(Upper64of128(v) ^ Lower64of128(v));
 }
 
+template <typename A>
+inline std::remove_reference_t<A> BitwiseAnd(A a, Unsigned128 b) {
+  static_assert(sizeof(A) <= sizeof(Unsigned128));
+  return static_cast<A>(a & b);
+}
+
+template <typename B>
+inline std::remove_reference_t<B> BitwiseAnd(Unsigned128 a, B b) {
+  static_assert(sizeof(B) <= sizeof(Unsigned128));
+  return static_cast<B>(a & b);
+}
+
 template <typename T>
 struct IsUnsignedUpTo128
     : std::integral_constant<bool, std::is_unsigned<T>::value ||