Sync rust-lang/portable-simd@5f49d4c8435a25d804b2f375e949cb25479f5be9

4de99e18 · Jubilee Young · 68369a04 · 5f49d4c8 · 4de99e18 · 4de99e18
21 changed file
--- a/library/portable-simd/crates/core_simd/examples/spectral_norm.rs
+++ b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs
+#![feature(portable_simd)]
+
+use core_simd::simd::*;
+
+fn a(i: usize, j: usize) -> f64 {
+    ((i + j) * (i + j + 1) / 2 + i + 1) as f64
+}
+
+fn mult_av(v: &[f64], out: &mut [f64]) {
+    assert!(v.len() == out.len());
+    assert!(v.len() % 2 == 0);
+
+    for (i, out) in out.iter_mut().enumerate() {
+        let mut sum = f64x2::splat(0.0);
+
+        let mut j = 0;
+        while j < v.len() {
+            let b = f64x2::from_slice(&v[j..]);
+            let a = f64x2::from_array([a(i, j), a(i, j + 1)]);
+            sum += b / a;
+            j += 2
+        }
+        *out = sum.horizontal_sum();
+    }
+}
+
+fn mult_atv(v: &[f64], out: &mut [f64]) {
+    assert!(v.len() == out.len());
+    assert!(v.len() % 2 == 0);
+
+    for (i, out) in out.iter_mut().enumerate() {
+        let mut sum = f64x2::splat(0.0);
+
+        let mut j = 0;
+        while j < v.len() {
+            let b = f64x2::from_slice(&v[j..]);
+            let a = f64x2::from_array([a(j, i), a(j + 1, i)]);
+            sum += b / a;
+            j += 2
+        }
+        *out = sum.horizontal_sum();
+    }
+}
+
+fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
+    mult_av(v, tmp);
+    mult_atv(tmp, out);
+}
+
+pub fn spectral_norm(n: usize) -> f64 {
+    assert!(n % 2 == 0, "only even lengths are accepted");
+
+    let mut u = vec![1.0; n];
+    let mut v = u.clone();
+    let mut tmp = u.clone();
+
+    for _ in 0..10 {
+        mult_atav(&u, &mut v, &mut tmp);
+        mult_atav(&v, &mut u, &mut tmp);
+    }
+    (dot(&u, &v) / dot(&v, &v)).sqrt()
+}
+
+fn dot(x: &[f64], y: &[f64]) -> f64 {
+    // This is auto-vectorized:
+    x.iter().zip(y).map(|(&x, &y)| x * y).sum()
+}
+
+#[cfg(test)]
+#[test]
+fn test() {
+    assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991");
+}
+
+fn main() {
+    // Empty main to make cargo happy
+}
--- a/library/portable-simd/crates/core_simd/src/comparisons.rs
+++ b/library/portable-simd/crates/core_simd/src/comparisons.rs
@@ -10,6 +10,8 @@ impl<T, const LANES: usize> Simd<T, LANES>
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn lanes_eq(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
        unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
    }

@@ -17,6 +19,8 @@ pub fn lanes_eq(self, other: Self) -> Mask<T::Mask, LANES> {
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn lanes_ne(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
        unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
    }
 }
@@ -30,6 +34,8 @@ impl<T, const LANES: usize> Simd<T, LANES>
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn lanes_lt(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
        unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
    }

@@ -37,6 +43,8 @@ pub fn lanes_lt(self, other: Self) -> Mask<T::Mask, LANES> {
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn lanes_gt(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
        unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
    }

@@ -44,6 +52,8 @@ pub fn lanes_gt(self, other: Self) -> Mask<T::Mask, LANES> {
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn lanes_le(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
        unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
    }

@@ -51,6 +61,8 @@ pub fn lanes_le(self, other: Self) -> Mask<T::Mask, LANES> {
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn lanes_ge(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
        unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
    }
 }
--- a/library/portable-simd/crates/core_simd/src/intrinsics.rs
+++ b/library/portable-simd/crates/core_simd/src/intrinsics.rs
@@ -2,31 +2,55 @@
 //! crate.
 //!
 //! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
+//!
+//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
+//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
+//!   poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
+//!   poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
+//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
+//!   this means that division by poison or undef is like division by zero, which means it inflicts...
+//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
+//!   LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
+//!   The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
+//!   and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
+//!
+//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
+//!
+//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.

 /// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
-/// simply lowered to the matching LLVM instructions by the compiler.  The associated instruction
-/// is documented alongside each intrinsic.
+/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
+/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
 extern "platform-intrinsic" {
    /// add/fadd
    pub(crate) fn simd_add<T>(x: T, y: T) -> T;

    /// sub/fsub
-    pub(crate) fn simd_sub<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;

    /// mul/fmul
    pub(crate) fn simd_mul<T>(x: T, y: T) -> T;

    /// udiv/sdiv/fdiv
-    pub(crate) fn simd_div<T>(x: T, y: T) -> T;
+    /// ints and uints: {s,u}div incur UB if division by zero occurs.
+    /// ints: sdiv is UB for int::MIN / -1.
+    /// floats: fdiv is never UB, but may create NaNs or infinities.
+    pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;

    /// urem/srem/frem
-    pub(crate) fn simd_rem<T>(x: T, y: T) -> T;
+    /// ints and uints: {s,u}rem incur UB if division by zero occurs.
+    /// ints: srem is UB for int::MIN / -1.
+    /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
+    pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;

    /// shl
-    pub(crate) fn simd_shl<T>(x: T, y: T) -> T;
+    /// for (u)ints. poison if rhs >= lhs::BITS
+    pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;

-    /// lshr/ashr
-    pub(crate) fn simd_shr<T>(x: T, y: T) -> T;
+    /// ints: ashr
+    /// uints: lshr
+    /// poison if rhs >= lhs::BITS
+    pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;

    /// and
    pub(crate) fn simd_and<T>(x: T, y: T) -> T;
@@ -38,6 +62,9 @@
    pub(crate) fn simd_xor<T>(x: T, y: T) -> T;

    /// fptoui/fptosi/uitofp/sitofp
+    /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
+    /// but the truncated value must fit in the target type or the result is poison.
+    /// use `simd_as` instead for a cast that performs a saturating conversion.
    pub(crate) fn simd_cast<T, U>(x: T) -> U;
    /// follows Rust's `T as U` semantics, including saturating float casts
    /// which amounts to the same as `simd_cast` for many cases
@@ -45,6 +72,9 @@
    pub(crate) fn simd_as<T, U>(x: T) -> U;

    /// neg/fneg
+    /// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`.
+    /// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it.
+    /// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`.
    pub(crate) fn simd_neg<T>(x: T) -> T;

    /// fabs
@@ -54,6 +84,7 @@
    pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
    pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;

+    // these return Simd<int, N> with the same BITS size as the inputs
    pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
    pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
    pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
@@ -62,19 +93,31 @@
    pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;

    // shufflevector
+    // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
    pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;

+    /// llvm.masked.gather
+    /// like a loop of pointer reads
+    /// val: vector of values to select if a lane is masked
+    /// ptr: vector of pointers to read from
+    /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
+    /// note, the LLVM intrinsic accepts a mask vector of <N x i1>
+    /// FIXME: review this if/when we fix up our mask story in general?
    pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
+    /// llvm.masked.scatter
+    /// like gather, but more spicy, as it writes instead of reads
    pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);

    // {s,u}add.sat
    pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;

    // {s,u}sub.sat
-    pub(crate) fn simd_saturating_sub<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;

    // reductions
+    // llvm.vector.reduce.{add,fadd}
    pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
+    // llvm.vector.reduce.{mul,fmul}
    pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
    #[allow(unused)]
    pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
@@ -91,7 +134,10 @@
    pub(crate) fn simd_bitmask<T, U>(x: T) -> U;

    // select
-    pub(crate) fn simd_select<M, T>(m: M, a: T, b: T) -> T;
+    // first argument is a vector of integers, -1 (all bits 1) is "true"
+    // logically equivalent to (yes & m) | (no & (m^-1),
+    // but you can use it on floats.
+    pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
    #[allow(unused)]
-    pub(crate) fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
+    pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
 }
--- a/library/portable-simd/crates/core_simd/src/lib.rs
+++ b/library/portable-simd/crates/core_simd/src/lib.rs
 #![cfg_attr(not(feature = "std"), no_std)]
 #![feature(
    const_fn_trait_bound,
+    convert_float_to_int,
    decl_macro,
+    intra_doc_pointers,
    platform_intrinsics,
    repr_simd,
    simd_ffi,

--- a/library/portable-simd/crates/core_simd/src/masks.rs
+++ b/library/portable-simd/crates/core_simd/src/masks.rs
@@ -12,8 +12,10 @@
 )]
 mod mask_impl;

-use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+mod to_bitmask;
+pub use to_bitmask::ToBitMask;
+
+use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount};
 use core::cmp::Ordering;
 use core::{fmt, mem};

@@ -42,6 +44,9 @@ fn valid<const LANES: usize>(values: Simd<Self, LANES>) -> bool
 use sealed::Sealed;

 /// Marker trait for types that may be used as SIMD mask elements.
+///
+/// # Safety
+/// Type must be a signed integer.
 pub unsafe trait MaskElement: SimdElement + Sealed {}

 macro_rules! impl_element {
@@ -149,6 +154,7 @@ pub fn splat(value: bool) -> Self {
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+        // Safety: the caller must confirm this invariant
        unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
    }

@@ -161,6 +167,7 @@ pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn from_int(value: Simd<T, LANES>) -> Self {
        assert!(T::valid(value), "all values must be either 0 or -1",);
+        // Safety: the validity has been checked
        unsafe { Self::from_int_unchecked(value) }
    }

@@ -179,6 +186,7 @@ pub fn to_int(self) -> Simd<T, LANES> {
    #[inline]
    #[must_use = "method returns a new bool and does not mutate the original value"]
    pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+        // Safety: the caller must confirm this invariant
        unsafe { self.0.test_unchecked(lane) }
    }

@@ -190,6 +198,7 @@ pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
    #[must_use = "method returns a new bool and does not mutate the original value"]
    pub fn test(&self, lane: usize) -> bool {
        assert!(lane < LANES, "lane index out of range");
+        // Safety: the lane index has been checked
        unsafe { self.test_unchecked(lane) }
    }

@@ -199,6 +208,7 @@ pub fn test(&self, lane: usize) -> bool {
    /// `lane` must be less than `LANES`.
    #[inline]
    pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+        // Safety: the caller must confirm this invariant
        unsafe {
            self.0.set_unchecked(lane, value);
        }
@@ -211,27 +221,12 @@ pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
    #[inline]
    pub fn set(&mut self, lane: usize, value: bool) {
        assert!(lane < LANES, "lane index out of range");
+        // Safety: the lane index has been checked
        unsafe {
            self.set_unchecked(lane, value);
        }
    }

-    /// Convert this mask to a bitmask, with one bit set per lane.
-    #[cfg(feature = "generic_const_exprs")]
-    #[inline]
-    #[must_use = "method returns a new array and does not mutate the original value"]
-    pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
-        self.0.to_bitmask()
-    }
-
-    /// Convert a bitmask to a mask.
-    #[cfg(feature = "generic_const_exprs")]
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-        Self(mask_impl::Mask::from_bitmask(bitmask))
-    }
-
    /// Returns true if any lane is set, or false otherwise.
    #[inline]
    #[must_use = "method returns a new bool and does not mutate the original value"]

--- a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
+++ b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
 #![allow(unused_imports)]
 use super::MaskElement;
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
 use core::marker::PhantomData;

 /// A mask where each lane is represented by a single bit.
@@ -115,20 +115,22 @@ pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
        unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
    }

-    #[cfg(feature = "generic_const_exprs")]
    #[inline]
-    #[must_use = "method returns a new array and does not mutate the original value"]
-    pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
-        // Safety: these are the same type and we are laundering the generic
+    pub fn to_bitmask_integer<U>(self) -> U
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // Safety: these are the same types
        unsafe { core::mem::transmute_copy(&self.0) }
    }

-    #[cfg(feature = "generic_const_exprs")]
    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-        // Safety: these are the same type and we are laundering the generic
-        Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData)
+    pub fn from_bitmask_integer<U>(bitmask: U) -> Self
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // Safety: these are the same types
+        unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) }
    }

    #[inline]
@@ -137,6 +139,7 @@ pub fn convert<U>(self) -> Mask<U, LANES>
    where
        U: MaskElement,
    {
+        // Safety: bitmask layout does not depend on the element width
        unsafe { core::mem::transmute_copy(&self) }
    }


--- a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
+++ b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
@@ -2,7 +2,7 @@

 use super::MaskElement;
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};

 #[repr(transparent)]
 pub struct Mask<T, const LANES: usize>(Simd<T, LANES>)
@@ -66,6 +66,23 @@ fn cmp(&self, other: &Self) -> core::cmp::Ordering {
    }
 }

+// Used for bitmask bit order workaround
+pub(crate) trait ReverseBits {
+    fn reverse_bits(self) -> Self;
+}
+
+macro_rules! impl_reverse_bits {
+    { $($int:ty),* } => {
+        $(
+        impl ReverseBits for $int {
+            fn reverse_bits(self) -> Self { <$int>::reverse_bits(self) }
+        }
+        )*
+    }
+}
+
+impl_reverse_bits! { u8, u16, u32, u64 }
+
 impl<T, const LANES: usize> Mask<T, LANES>
 where
    T: MaskElement,
@@ -106,44 +123,40 @@ pub fn convert<U>(self) -> Mask<U, LANES>
    where
        U: MaskElement,
    {
+        // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type.
        unsafe { Mask(intrinsics::simd_cast(self.0)) }
    }

-    #[cfg(feature = "generic_const_exprs")]
    #[inline]
-    #[must_use = "method returns a new array and does not mutate the original value"]
-    pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
-        unsafe {
-            let mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN] =
-                intrinsics::simd_bitmask(self.0);
-
-            // There is a bug where LLVM appears to implement this operation with the wrong
-            // bit order.
-            // TODO fix this in a better way
-            if cfg!(target_endian = "big") {
-                for x in bitmask.as_mut() {
-                    *x = x.reverse_bits();
-                }
-            }
+    pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // Safety: U is required to be the appropriate bitmask type
+        let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) };

+        // LLVM assumes bit order should match endianness
+        if cfg!(target_endian = "big") {
+            bitmask.reverse_bits()
+        } else {
            bitmask
        }
    }

-    #[cfg(feature = "generic_const_exprs")]
    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask(mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-        unsafe {
-            // There is a bug where LLVM appears to implement this operation with the wrong
-            // bit order.
-            // TODO fix this in a better way
-            if cfg!(target_endian = "big") {
-                for x in bitmask.as_mut() {
-                    *x = x.reverse_bits();
-                }
-            }
+    pub(crate) fn from_bitmask_integer<U: ReverseBits>(bitmask: U) -> Self
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // LLVM assumes bit order should match endianness
+        let bitmask = if cfg!(target_endian = "big") {
+            bitmask.reverse_bits()
+        } else {
+            bitmask
+        };

+        // Safety: U is required to be the appropriate bitmask type
+        unsafe {
            Self::from_int_unchecked(intrinsics::simd_select_bitmask(
                bitmask,
                Self::splat(true).to_int(),
@@ -155,12 +168,14 @@ pub fn convert<U>(self) -> Mask<U, LANES>
    #[inline]
    #[must_use = "method returns a new bool and does not mutate the original value"]
    pub fn any(self) -> bool {
+        // Safety: use `self` as an integer vector
        unsafe { intrinsics::simd_reduce_any(self.to_int()) }
    }

    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original value"]
    pub fn all(self) -> bool {
+        // Safety: use `self` as an integer vector
        unsafe { intrinsics::simd_reduce_all(self.to_int()) }
    }
 }
@@ -184,6 +199,7 @@ impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    fn bitand(self, rhs: Self) -> Self {
+        // Safety: `self` is an integer vector
        unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) }
    }
 }
@@ -197,6 +213,7 @@ impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    fn bitor(self, rhs: Self) -> Self {
+        // Safety: `self` is an integer vector
        unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) }
    }
 }
@@ -210,6 +227,7 @@ impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    fn bitxor(self, rhs: Self) -> Self {
+        // Safety: `self` is an integer vector
        unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) }
    }
 }

--- a/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs
+++ b/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs
+use super::{mask_impl, Mask, MaskElement};
+use crate::simd::{LaneCount, SupportedLaneCount};
+
+mod sealed {
+    pub trait Sealed {}
+}
+pub use sealed::Sealed;
+
+impl<T, const LANES: usize> Sealed for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+/// Converts masks to and from integer bitmasks.
+///
+/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB.
+///
+/// # Safety
+/// This trait is `unsafe` and sealed, since the `BitMask` type must match the number of lanes in
+/// the mask.
+pub unsafe trait ToBitMask: Sealed {
+    /// The integer bitmask type.
+    type BitMask;
+
+    /// Converts a mask to a bitmask.
+    fn to_bitmask(self) -> Self::BitMask;
+
+    /// Converts a bitmask to a mask.
+    fn from_bitmask(bitmask: Self::BitMask) -> Self;
+}
+
+macro_rules! impl_integer_intrinsic {
+    { $(unsafe impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => {
+        $(
+        unsafe impl<T: MaskElement> ToBitMask for Mask<T, $lanes> {
+            type BitMask = $int;
+
+            fn to_bitmask(self) -> $int {
+                self.0.to_bitmask_integer()
+            }
+
+            fn from_bitmask(bitmask: $int) -> Self {
+                Self(mask_impl::Mask::from_bitmask_integer(bitmask))
+            }
+        }
+        )*
+    }
+}
+
+impl_integer_intrinsic! {
+    unsafe impl ToBitMask<BitMask=u8> for Mask<_, 8>
+    unsafe impl ToBitMask<BitMask=u16> for Mask<_, 16>
+    unsafe impl ToBitMask<BitMask=u32> for Mask<_, 32>
+    unsafe impl ToBitMask<BitMask=u64> for Mask<_, 64>
+}
--- a/library/portable-simd/crates/core_simd/src/math.rs
+++ b/library/portable-simd/crates/core_simd/src/math.rs
@@ -22,6 +22,7 @@
            /// ```
            #[inline]
            pub fn saturating_add(self, second: Self) -> Self {
+                // Safety: `self` is a vector
                unsafe { simd_saturating_add(self, second) }
            }

@@ -41,6 +42,7 @@ pub fn saturating_add(self, second: Self) -> Self {
            /// assert_eq!(sat, Simd::splat(0));
            #[inline]
            pub fn saturating_sub(self, second: Self) -> Self {
+                // Safety: `self` is a vector
                unsafe { simd_saturating_sub(self, second) }
            }
        })+
@@ -68,6 +70,7 @@ pub fn saturating_sub(self, second: Self) -> Self {
            /// ```
            #[inline]
            pub fn saturating_add(self, second: Self) -> Self {
+                // Safety: `self` is a vector
                unsafe { simd_saturating_add(self, second) }
            }

@@ -87,6 +90,7 @@ pub fn saturating_add(self, second: Self) -> Self {
            /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0]));
            #[inline]
            pub fn saturating_sub(self, second: Self) -> Self {
+                // Safety: `self` is a vector
                unsafe { simd_saturating_sub(self, second) }
            }


--- a/library/portable-simd/crates/core_simd/src/ops.rs
+++ b/library/portable-simd/crates/core_simd/src/ops.rs
@@ -57,29 +57,40 @@ fn index_mut(&mut self, index: I) -> &mut Self::Output {
    };
 }

-// Division by zero is poison, according to LLVM.
-// So is dividing the MIN value of a signed integer by -1,
-// since that would return MAX + 1.
-// FIXME: Rust allows <SInt>::MIN / -1,
-// so we should probably figure out how to make that safe.
+/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
+/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
+/// thus guaranteeing a Rust value returns instead.
+///
+/// |                  | LLVM | Rust
+/// | :--------------: | :--- | :----------
+/// | N {/,%} 0        | UB   | panic!()
+/// | <$int>::MIN / -1 | UB   | <$int>::MIN
+/// | <$int>::MIN % -1 | UB   | 0
+///
 macro_rules! int_divrem_guard {
    (   $lhs:ident,
        $rhs:ident,
        {   const PANIC_ZERO: &'static str = $zero:literal;
-            const PANIC_OVERFLOW: &'static str = $overflow:literal;
            $simd_call:ident
        },
        $int:ident ) => {
        if $rhs.lanes_eq(Simd::splat(0)).any() {
            panic!($zero);
-        } else if <$int>::MIN != 0
-            && ($lhs.lanes_eq(Simd::splat(<$int>::MIN))
-                // type inference can break here, so cut an SInt to size
-                & $rhs.lanes_eq(Simd::splat(-1i64 as _))).any()
-        {
-            panic!($overflow);
        } else {
-            unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
+            // Prevent otherwise-UB overflow on the MIN / -1 case.
+            let rhs = if <$int>::MIN != 0 {
+                // This should, at worst, optimize to a few branchless logical ops
+                // Ideally, this entire conditional should evaporate
+                // Fire LLVM and implement those manually if it doesn't get the hint
+                ($lhs.lanes_eq(Simd::splat(<$int>::MIN))
+                // type inference can break here, so cut an SInt to size
+                & $rhs.lanes_eq(Simd::splat(-1i64 as _)))
+                .select(Simd::splat(1), $rhs)
+            } else {
+                // Nice base case to make it easy to const-fold away the other branch.
+                $rhs
+            };
+            unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) }
        }
    };
 }
@@ -183,7 +194,6 @@ impl BitXor::bitxor {
    impl Div::div {
        int_divrem_guard {
            const PANIC_ZERO: &'static str = "attempt to divide by zero";
-            const PANIC_OVERFLOW: &'static str = "attempt to divide with overflow";
            simd_div
        }
    }
@@ -191,7 +201,6 @@ impl Div::div {
    impl Rem::rem {
        int_divrem_guard {
            const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
-            const PANIC_OVERFLOW: &'static str = "attempt to calculate the remainder with overflow";
            simd_rem
        }
    }

--- a/library/portable-simd/crates/core_simd/src/reduction.rs
+++ b/library/portable-simd/crates/core_simd/src/reduction.rs
@@ -14,24 +14,28 @@ impl<const LANES: usize> Simd<$scalar, LANES>
            /// Horizontal wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
            #[inline]
            pub fn horizontal_sum(self) -> $scalar {
+                // Safety: `self` is an integer vector
                unsafe { simd_reduce_add_ordered(self, 0) }
            }

            /// Horizontal wrapping multiply.  Returns the product of the lanes of the vector, with wrapping multiplication.
            #[inline]
            pub fn horizontal_product(self) -> $scalar {
+                // Safety: `self` is an integer vector
                unsafe { simd_reduce_mul_ordered(self, 1) }
            }

            /// Horizontal maximum.  Returns the maximum lane in the vector.
            #[inline]
            pub fn horizontal_max(self) -> $scalar {
+                // Safety: `self` is an integer vector
                unsafe { simd_reduce_max(self) }
            }

            /// Horizontal minimum.  Returns the minimum lane in the vector.
            #[inline]
            pub fn horizontal_min(self) -> $scalar {
+                // Safety: `self` is an integer vector
                unsafe { simd_reduce_min(self) }
            }
        }
@@ -63,6 +67,7 @@ pub fn horizontal_sum(self) -> $scalar {
                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
                    self.as_array().iter().sum()
                } else {
+                    // Safety: `self` is a float vector
                    unsafe { simd_reduce_add_ordered(self, 0.) }
                }
            }
@@ -74,6 +79,7 @@ pub fn horizontal_product(self) -> $scalar {
                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
                    self.as_array().iter().product()
                } else {
+                    // Safety: `self` is a float vector
                    unsafe { simd_reduce_mul_ordered(self, 1.) }
                }
            }
@@ -84,6 +90,7 @@ pub fn horizontal_product(self) -> $scalar {
            /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
            #[inline]
            pub fn horizontal_max(self) -> $scalar {
+                // Safety: `self` is a float vector
                unsafe { simd_reduce_max(self) }
            }

@@ -93,6 +100,7 @@ pub fn horizontal_max(self) -> $scalar {
            /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
            #[inline]
            pub fn horizontal_min(self) -> $scalar {
+                // Safety: `self` is a float vector
                unsafe { simd_reduce_min(self) }
            }
        }

--- a/library/portable-simd/crates/core_simd/src/round.rs
+++ b/library/portable-simd/crates/core_simd/src/round.rs
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+use core::convert::FloatToInt;

 macro_rules! implement {
    {
-        $type:ty, $int_type:ty
+        $type:ty
    } => {
        impl<const LANES: usize> Simd<$type, LANES>
        where
@@ -18,20 +19,22 @@ impl<const LANES: usize> Simd<$type, LANES>
            /// * Not be NaN
            /// * Not be infinite
            /// * Be representable in the return type, after truncating off its fractional part
+            ///
+            /// If these requirements are infeasible or costly, consider using the safe function [cast],
+            /// which saturates on conversion.
+            ///
+            /// [cast]: Simd::cast
            #[inline]
-            pub unsafe fn to_int_unchecked(self) -> Simd<$int_type, LANES> {
+            pub unsafe fn to_int_unchecked<I>(self) -> Simd<I, LANES>
+            where
+                $type: FloatToInt<I>,
+                I: SimdElement,
+            {
                unsafe { intrinsics::simd_cast(self) }
            }
-
-            /// Creates a floating-point vector from an integer vector.  Rounds values that are
-            /// not exactly representable.
-            #[inline]
-            pub fn round_from_int(value: Simd<$int_type, LANES>) -> Self {
-                unsafe { intrinsics::simd_cast(value) }
-            }
        }
    }
 }

-implement! { f32, i32 }
-implement! { f64, i64 }
+implement! { f32 }
+implement! { f64 }
--- a/library/portable-simd/crates/core_simd/src/select.rs
+++ b/library/portable-simd/crates/core_simd/src/select.rs
@@ -11,6 +11,7 @@ impl<T, const LANES: usize> Mask<T, LANES>
    /// For each lane in the mask, choose the corresponding lane from `true_values` if
    /// that lane mask is true, and `false_values` if that lane mask is false.
    ///
+    /// # Examples
    /// ```
    /// # #![feature(portable_simd)]
    /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
@@ -31,6 +32,8 @@ pub fn select<U>(
    where
        U: SimdElement<Mask = T>,
    {
+        // Safety: The mask has been cast to a vector of integers,
+        // and the operands to select between are vectors of the same type and length.
        unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) }
    }

@@ -39,6 +42,7 @@ pub fn select<U>(
    /// For each lane in the mask, choose the corresponding lane from `true_values` if
    /// that lane mask is true, and `false_values` if that lane mask is false.
    ///
+    /// # Examples
    /// ```
    /// # #![feature(portable_simd)]
    /// # #[cfg(feature = "std")] use core_simd::Mask;

--- a/library/portable-simd/crates/core_simd/src/swizzle.rs
+++ b/library/portable-simd/crates/core_simd/src/swizzle.rs
@@ -95,6 +95,7 @@ fn swizzle<T>(vector: Simd<T, INPUT_LANES>) -> Simd<T, OUTPUT_LANES>
        LaneCount<INPUT_LANES>: SupportedLaneCount,
        LaneCount<OUTPUT_LANES>: SupportedLaneCount,
    {
+        // Safety: `vector` is a vector, and `INDEX_IMPL` is a const array of u32.
        unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) }
    }
 }
@@ -119,6 +120,7 @@ fn swizzle2<T>(
        LaneCount<INPUT_LANES>: SupportedLaneCount,
        LaneCount<OUTPUT_LANES>: SupportedLaneCount,
    {
+        // Safety: `first` and `second` are vectors, and `INDEX_IMPL` is a const array of u32.
        unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) }
    }
 }

--- a/library/portable-simd/crates/core_simd/src/to_bytes.rs
+++ b/library/portable-simd/crates/core_simd/src/to_bytes.rs
@@ -8,12 +8,14 @@ impl<const LANES: usize> crate::simd::Simd<$ty, LANES>
            /// Return the memory representation of this integer as a byte array in native byte
            /// order.
            pub fn to_ne_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
+                // Safety: transmuting between vectors is safe
                unsafe { core::mem::transmute_copy(&self) }
            }

            /// Create a native endian integer value from its memory representation as a byte array
            /// in native endianness.
            pub fn from_ne_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
+                // Safety: transmuting between vectors is safe
                unsafe { core::mem::transmute_copy(&bytes) }
            }
        }

--- a/library/portable-simd/crates/core_simd/src/vector.rs
+++ b/library/portable-simd/crates/core_simd/src/vector.rs
@@ -12,7 +12,79 @@
 use crate::simd::intrinsics;
 use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount};

-/// A SIMD vector of `LANES` elements of type `T`.
+/// A SIMD vector of `LANES` elements of type `T`. `Simd<T, N>` has the same shape as [`[T; N]`](array), but operates like `T`.
+///
+/// Two vectors of the same type and length will, by convention, support the operators (+, *, etc.) that `T` does.
+/// These take the lanes at each index on the left-hand side and right-hand side, perform the operation,
+/// and return the result in the same lane in a vector of equal size. For a given operator, this is equivalent to zipping
+/// the two arrays together and mapping the operator over each lane.
+///
+/// ```rust
+/// # #![feature(array_zip, portable_simd)]
+/// # use core::simd::{Simd};
+/// let a0: [i32; 4] = [-2, 0, 2, 4];
+/// let a1 = [10, 9, 8, 7];
+/// let zm_add = a0.zip(a1).map(|(lhs, rhs)| lhs + rhs);
+/// let zm_mul = a0.zip(a1).map(|(lhs, rhs)| lhs * rhs);
+///
+/// // `Simd<T, N>` implements `From<[T; N]>
+/// let (v0, v1) = (Simd::from(a0), Simd::from(a1));
+/// // Which means arrays implement `Into<Simd<T, N>>`.
+/// assert_eq!(v0 + v1, zm_add.into());
+/// assert_eq!(v0 * v1, zm_mul.into());
+/// ```
+///
+/// `Simd` with integers has the quirk that these operations are also inherently wrapping, as if `T` was [`Wrapping<T>`].
+/// Thus, `Simd` does not implement `wrapping_add`, because that is the default behavior.
+/// This means there is no warning on overflows, even in "debug" builds.
+/// For most applications where `Simd` is appropriate, it is "not a bug" to wrap,
+/// and even "debug builds" are unlikely to tolerate the loss of performance.
+/// You may want to consider using explicitly checked arithmetic if such is required.
+/// Division by zero still causes a panic, so you may want to consider using floating point numbers if that is unacceptable.
+///
+/// [`Wrapping<T>`]: core::num::Wrapping
+///
+/// # Layout
+/// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), but with a greater alignment.
+/// `[T; N]` is aligned to `T`, but `Simd<T, N>` will have an alignment based on both `T` and `N`.
+/// It is thus sound to [`transmute`] `Simd<T, N>` to `[T; N]`, and will typically optimize to zero cost,
+/// but the reverse transmutation is more likely to require a copy the compiler cannot simply elide.
+///
+/// # ABI "Features"
+/// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed to and from functions via memory, not SIMD registers,
+/// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd<T, N>` or return it.
+/// The need for this may be corrected in the future.
+///
+/// # Safe SIMD with Unsafe Rust
+///
+/// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code.
+/// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from.
+/// In particular, the layout of `Simd<T, N>` may be similar to `[T; N]`, and may allow some transmutations,
+/// but references to `[T; N]` are not interchangeable with those to `Simd<T, N>`.
+/// Thus, when using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is a good idea to first try with
+/// [`read_unaligned`] and [`write_unaligned`]. This is because:
+/// - [`read`] and [`write`] require full alignment (in this case, `Simd<T, N>`'s alignment)
+/// - the likely source for reading or destination for writing `Simd<T, N>` is [`[T]`](slice) and similar types, aligned to `T`
+/// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior**
+/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization
+/// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime
+///
+/// By imposing less obligations, unaligned functions are less likely to make the program unsound,
+/// and may be just as fast as stricter alternatives.
+/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for converting `[T]` to `[Simd<T, N>]`,
+/// and allows soundly operating on an aligned SIMD body, but it may cost more time when handling the scalar head and tail.
+/// If these are not sufficient, then it is most ideal to design data structures to be already aligned
+/// to the `Simd<T, N>` you wish to use before using `unsafe` Rust to read or write.
+/// More conventional ways to compensate for these facts, like materializing `Simd` to or from an array first,
+/// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`].
+///
+/// [`transmute`]: core::mem::transmute
+/// [raw pointers]: pointer
+/// [`read_unaligned`]: pointer::read_unaligned
+/// [`write_unaligned`]: pointer::write_unaligned
+/// [`read`]: pointer::read
+/// [`write`]: pointer::write
+/// [as_simd]: slice::as_simd
 #[repr(simd)]
 pub struct Simd<T, const LANES: usize>([T; LANES])
 where
@@ -102,6 +174,7 @@ pub const fn from_slice(slice: &[T]) -> Self {
    #[inline]
    #[cfg(not(bootstrap))]
    pub fn cast<U: SimdElement>(self) -> Simd<U, LANES> {
+        // Safety: The input argument is a vector of a known SIMD type.
        unsafe { intrinsics::simd_as(self) }
    }

@@ -175,7 +248,7 @@ pub fn gather_select(
        or: Self,
    ) -> Self {
        let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
-        // SAFETY: We have masked-off out-of-bounds lanes.
+        // Safety: We have masked-off out-of-bounds lanes.
        unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) }
    }

@@ -216,7 +289,7 @@ pub unsafe fn gather_select_unchecked(
        let base_ptr = crate::simd::ptr::SimdConstPtr::splat(slice.as_ptr());
        // Ferris forgive me, I have done pointer arithmetic here.
        let ptrs = base_ptr.wrapping_add(idxs);
-        // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
+        // Safety: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
        unsafe { intrinsics::simd_gather(or, ptrs, enable.to_int()) }
    }

@@ -268,7 +341,7 @@ pub fn scatter_select(
        idxs: Simd<usize, LANES>,
    ) {
        let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
-        // SAFETY: We have masked-off out-of-bounds lanes.
+        // Safety: We have masked-off out-of-bounds lanes.
        unsafe { self.scatter_select_unchecked(slice, enable, idxs) }
    }

@@ -307,7 +380,7 @@ pub unsafe fn scatter_select_unchecked(
        enable: Mask<isize, LANES>,
        idxs: Simd<usize, LANES>,
    ) {
-        // SAFETY: This block works with *mut T derived from &mut 'a [T],
+        // Safety: This block works with *mut T derived from &mut 'a [T],
        // which means it is delicate in Rust's borrowing model, circa 2021:
        // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
        // Even though this block is largely safe methods, it must be exactly this way
@@ -487,7 +560,9 @@ pub trait Sealed {}
 use sealed::Sealed;

 /// Marker trait for types that may be used as SIMD vector elements.
-/// SAFETY: This trait, when implemented, asserts the compiler can monomorphize
+///
+/// # Safety
+/// This trait, when implemented, asserts the compiler can monomorphize
 /// `#[repr(simd)]` structs with the marked type as an element.
 /// Strictly, it is valid to impl if the vector will not be miscompiled.
 /// Practically, it is user-unfriendly to impl it if the vector won't compile,

--- a/library/portable-simd/crates/core_simd/src/vector/ptr.rs
+++ b/library/portable-simd/crates/core_simd/src/vector/ptr.rs
@@ -21,6 +21,8 @@ pub fn splat(ptr: *const T) -> Self {
    #[inline]
    #[must_use]
    pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
+        // Safety: converting pointers to usize and vice-versa is safe
+        // (even if using that pointer is not)
        unsafe {
            let x: Simd<usize, LANES> = mem::transmute_copy(&self);
            mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
@@ -47,6 +49,8 @@ pub fn splat(ptr: *mut T) -> Self {
    #[inline]
    #[must_use]
    pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
+        // Safety: converting pointers to usize and vice-versa is safe
+        // (even if using that pointer is not)
        unsafe {
            let x: Simd<usize, LANES> = mem::transmute_copy(&self);
            mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })

--- a/library/portable-simd/crates/core_simd/src/vendor.rs
+++ b/library/portable-simd/crates/core_simd/src/vendor.rs
@@ -9,6 +9,8 @@
        impl core::convert::From<$from> for $to {
            #[inline]
            fn from(value: $from) -> $to {
+                // Safety: transmuting between vectors is safe, but the caller of this macro
+                // checks the invariants
                unsafe { core::mem::transmute(value) }
            }
        }

--- a/library/portable-simd/crates/core_simd/tests/masks.rs
+++ b/library/portable-simd/crates/core_simd/tests/masks.rs
@@ -68,16 +68,16 @@ fn roundtrip_int_conversion() {
                assert_eq!(core_simd::Mask::<$type, 8>::from_int(int), mask);
            }

-            #[cfg(feature = "generic_const_exprs")]
            #[test]
            fn roundtrip_bitmask_conversion() {
+                use core_simd::ToBitMask;
                let values = [
                    true, false, false, true, false, false, true, false,
                    true, true, false, false, false, false, false, true,
                ];
                let mask = core_simd::Mask::<$type, 16>::from_array(values);
                let bitmask = mask.to_bitmask();
-                assert_eq!(bitmask, [0b01001001, 0b10000011]);
+                assert_eq!(bitmask, 0b1000001101001001);
                assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask);
            }
        }

--- a/library/portable-simd/crates/core_simd/tests/ops_macros.rs
+++ b/library/portable-simd/crates/core_simd/tests/ops_macros.rs
@@ -210,15 +210,21 @@ fn signum<const LANES: usize>() {
                    )
                }

-            }
+                fn div_min_may_overflow<const LANES: usize>() {
+                    let a = Vector::<LANES>::splat(Scalar::MIN);
+                    let b = Vector::<LANES>::splat(-1);
+                    assert_eq!(a / b, a);
+                }

-            test_helpers::test_lanes_panic! {
-                fn div_min_overflow_panics<const LANES: usize>() {
+                fn rem_min_may_overflow<const LANES: usize>() {
                    let a = Vector::<LANES>::splat(Scalar::MIN);
                    let b = Vector::<LANES>::splat(-1);
-                    let _ = a / b;
+                    assert_eq!(a % b, Vector::<LANES>::splat(0));
                }

+            }
+
+            test_helpers::test_lanes_panic! {
                fn div_by_all_zeros_panics<const LANES: usize>() {
                    let a = Vector::<LANES>::splat(42);
                    let b = Vector::<LANES>::splat(0);
@@ -232,12 +238,6 @@ fn div_by_one_zero_panics<const LANES: usize>() {
                    let _ = a / b;
                }

-                fn rem_min_overflow_panic<const LANES: usize>() {
-                    let a = Vector::<LANES>::splat(Scalar::MIN);
-                    let b = Vector::<LANES>::splat(-1);
-                    let _ = a % b;
-                }
-
                fn rem_zero_panic<const LANES: usize>() {
                    let a = Vector::<LANES>::splat(42);
                    let b = Vector::<LANES>::splat(0);

--- a/library/portable-simd/crates/core_simd/tests/round.rs
+++ b/library/portable-simd/crates/core_simd/tests/round.rs
@@ -53,14 +53,6 @@ fn fract<const LANES: usize>() {
            }

            test_helpers::test_lanes! {
-                fn from_int<const LANES: usize>() {
-                    test_helpers::test_unary_elementwise(
-                        &Vector::<LANES>::round_from_int,
-                        &|x| x as Scalar,
-                        &|_| true,
-                    )
-                }
-
                fn to_int_unchecked<const LANES: usize>() {
                    // The maximum integer that can be represented by the equivalently sized float has
                    // all of the mantissa digits set to 1, pushed up to the MSB.
@@ -72,11 +64,11 @@ fn to_int_unchecked<const LANES: usize>() {
                    runner.run(
                        &test_helpers::array::UniformArrayStrategy::new(-MAX_REPRESENTABLE_VALUE..MAX_REPRESENTABLE_VALUE),
                        |x| {
-                            let result_1 = unsafe { Vector::from_array(x).to_int_unchecked().to_array() };
+                            let result_1 = unsafe { Vector::from_array(x).to_int_unchecked::<IntScalar>().to_array() };
                            let result_2 = {
-                                let mut result = [0; LANES];
+                                let mut result: [IntScalar; LANES] = [0; LANES];
                                for (i, o) in x.iter().zip(result.iter_mut()) {
-                                    *o = unsafe { i.to_int_unchecked() };
+                                    *o = unsafe { i.to_int_unchecked::<IntScalar>() };
                                }
                                result
                            };