提交 872ba2cc 编写于 作者: B bors

auto merge of #19294 : huonw/rust/transmute-inplace, r=nikomatsakis

This detects (a subset of) the cases when `transmute::<T, U>(x)` can be
lowered to a direct `bitcast T x to U` in LLVM. This assists with
efficiently handling a SIMD vector as multiple different types,
e.g. swapping bytes/words/double words around inside some larger vector
type.

C compilers like GCC and Clang handle integer vector types as `__m128i`
for all widths, and implicitly insert bitcasts as required. This patch
allows Rust to express this, even if it takes a bit of `unsafe`, whereas
previously it was impossible to do at all without inline assembly.

Example:

    pub fn reverse_u32s(u: u64x2) -> u64x2 {
        unsafe {
            let tmp = mem::transmute::<_, u32x4>(u);
            let swapped = u32x4(tmp.3, tmp.2, tmp.1, tmp.0);
            mem::transmute::<_, u64x2>(swapped)
        }
    }

Compiling with `--opt-level=3` gives:

Before

    define <2 x i64> @_ZN12reverse_u32s20hbdb206aba18a03d8tbaE(<2 x i64>) unnamed_addr #0 {
    entry-block:
      %1 = bitcast <2 x i64> %0 to i128
      %u.0.extract.trunc = trunc i128 %1 to i32
      %u.4.extract.shift = lshr i128 %1, 32
      %u.4.extract.trunc = trunc i128 %u.4.extract.shift to i32
      %u.8.extract.shift = lshr i128 %1, 64
      %u.8.extract.trunc = trunc i128 %u.8.extract.shift to i32
      %u.12.extract.shift = lshr i128 %1, 96
      %u.12.extract.trunc = trunc i128 %u.12.extract.shift to i32
      %2 = insertelement <4 x i32> undef, i32 %u.12.extract.trunc, i64 0
      %3 = insertelement <4 x i32> %2, i32 %u.8.extract.trunc, i64 1
      %4 = insertelement <4 x i32> %3, i32 %u.4.extract.trunc, i64 2
      %5 = insertelement <4 x i32> %4, i32 %u.0.extract.trunc, i64 3
      %6 = bitcast <4 x i32> %5 to <2 x i64>
      ret <2 x i64> %6
    }

    _ZN12reverse_u32s20hbdb206aba18a03d8tbaE:
    	.cfi_startproc
    	movd	%xmm0, %rax
    	punpckhqdq	%xmm0, %xmm0
    	movd	%xmm0, %rcx
    	movq	%rcx, %rdx
    	shrq	$32, %rdx
    	movq	%rax, %rsi
    	shrq	$32, %rsi
    	movd	%eax, %xmm0
    	movd	%ecx, %xmm1
    	punpckldq	%xmm0, %xmm1
    	movd	%esi, %xmm2
    	movd	%edx, %xmm0
    	punpckldq	%xmm2, %xmm0
    	punpckldq	%xmm1, %xmm0
    	retq

After

    define <2 x i64> @_ZN12reverse_u32s20hbdb206aba18a03d8tbaE(<2 x i64>) unnamed_addr #0 {
    entry-block:
      %1 = bitcast <2 x i64> %0 to <4 x i32>
      %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
      %3 = bitcast <4 x i32> %2 to <2 x i64>
      ret <2 x i64> %3
    }

    _ZN12reverse_u32s20hbdb206aba18a03d8tbaE:
    	.cfi_startproc
    	pshufd	$27, %xmm0, %xmm0
    	retq
......@@ -11,7 +11,7 @@
#![allow(non_upper_case_globals)]
use llvm;
use llvm::{SequentiallyConsistent, Acquire, Release, AtomicXchg, ValueRef};
use llvm::{SequentiallyConsistent, Acquire, Release, AtomicXchg, ValueRef, TypeKind};
use middle::subst;
use middle::subst::FnSpace;
use trans::base::*;
......@@ -174,12 +174,65 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
// This should be caught by the intrinsicck pass
assert_eq!(in_type_size, out_type_size);
// We need to cast the dest so the types work out
let dest = match dest {
expr::SaveIn(d) => expr::SaveIn(PointerCast(bcx, d, llintype.ptr_to())),
expr::Ignore => expr::Ignore
let nonpointer_nonaggregate = |llkind: TypeKind| -> bool {
use llvm::TypeKind::*;
match llkind {
Half | Float | Double | X86_FP80 | FP128 |
PPC_FP128 | Integer | Vector | X86_MMX => true,
_ => false
}
};
// An approximation to which types can be directly cast via
// LLVM's bitcast. This doesn't cover pointer -> pointer casts,
// but does, importantly, cover SIMD types.
let in_kind = llintype.kind();
let ret_kind = llret_ty.kind();
let bitcast_compatible =
(nonpointer_nonaggregate(in_kind) && nonpointer_nonaggregate(ret_kind)) || {
in_kind == TypeKind::Pointer && ret_kind == TypeKind::Pointer
};
let dest = if bitcast_compatible {
// if we're here, the type is scalar-like (a primitive, a
// SIMD type or a pointer), and so can be handled as a
// by-value ValueRef and can also be directly bitcast to the
// target type. Doing this special case makes conversions
// like `u32x4` -> `u64x2` much nicer for LLVM and so more
// efficient (these are done efficiently implicitly in C
// with the `__m128i` type and so this means Rust doesn't
// lose out there).
let expr = &*arg_exprs[0];
let datum = unpack_datum!(bcx, expr::trans(bcx, expr));
let datum = unpack_datum!(bcx, datum.to_rvalue_datum(bcx, "transmute_temp"));
let val = if datum.kind.is_by_ref() {
load_ty(bcx, datum.val, datum.ty)
} else {
datum.val
};
let cast_val = BitCast(bcx, val, llret_ty);
match dest {
expr::SaveIn(d) => {
// this often occurs in a sequence like `Store(val,
// d); val2 = Load(d)`, so disappears easily.
Store(bcx, cast_val, d);
}
expr::Ignore => {}
}
dest
} else {
// The types are too complicated to do with a by-value
// bitcast, so pointer cast instead. We need to cast the
// dest so the types work out.
let dest = match dest {
expr::SaveIn(d) => expr::SaveIn(PointerCast(bcx, d, llintype.ptr_to())),
expr::Ignore => expr::Ignore
};
bcx = expr::trans_into(bcx, &*arg_exprs[0], dest);
dest
};
bcx = expr::trans_into(bcx, &*arg_exprs[0], dest);
fcx.pop_custom_cleanup_scope(cleanup_scope);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册