提交 4c40ff6a 编写于 作者: N Nikita Popov

Implement rotate using funnel shift on LLVM >= 7

Implement the rotate_left and rotate_right operations using
llvm.fshl and llvm.fshr if they are available (LLVM >= 7).

Originally I wanted to expose the funnel_shift_left and
funnel_shift_right intrinsics and implement rotate_left and
rotate_right on top of them. However, emulation of funnel
shifts requires emitting a conditional to check for zero shift
amount, which is not necessary for rotates. I was uncomfortable
doing that here, as I don't want to rely on LLVM to optimize
away that conditional (and for variable rotates, I'm not sure it
can). We should revisit that question when we raise our minimum
version requirement to LLVM 7 and don't need emulation code
anymore.
上级 2ad8c7b3
...@@ -1465,6 +1465,20 @@ pub fn volatile_copy_nonoverlapping_memory<T>(dst: *mut T, src: *const T, ...@@ -1465,6 +1465,20 @@ pub fn volatile_copy_nonoverlapping_memory<T>(dst: *mut T, src: *const T,
/// y < 0 or y >= N, where N is the width of T in bits. /// y < 0 or y >= N, where N is the width of T in bits.
pub fn unchecked_shr<T>(x: T, y: T) -> T; pub fn unchecked_shr<T>(x: T, y: T) -> T;
/// Performs rotate left.
/// The stabilized versions of this intrinsic are available on the integer
/// primitives via the `rotate_left` method. For example,
/// [`std::u32::rotate_left`](../../std/primitive.u32.html#method.rotate_left)
#[cfg(not(stage0))]
pub fn rotate_left<T>(x: T, y: T) -> T;
/// Performs rotate right.
/// The stabilized versions of this intrinsic are available on the integer
/// primitives via the `rotate_right` method. For example,
/// [`std::u32::rotate_right`](../../std/primitive.u32.html#method.rotate_right)
#[cfg(not(stage0))]
pub fn rotate_right<T>(x: T, y: T) -> T;
/// Returns (a + b) mod 2<sup>N</sup>, where N is the width of T in bits. /// Returns (a + b) mod 2<sup>N</sup>, where N is the width of T in bits.
/// The stabilized versions of this intrinsic are available on the integer /// The stabilized versions of this intrinsic are available on the integer
/// primitives via the `wrapping_add` method. For example, /// primitives via the `wrapping_add` method. For example,
......
...@@ -2301,7 +2301,12 @@ pub const fn trailing_zeros(self) -> u32 { ...@@ -2301,7 +2301,12 @@ pub const fn trailing_zeros(self) -> u32 {
#[rustc_const_unstable(feature = "const_int_rotate")] #[rustc_const_unstable(feature = "const_int_rotate")]
#[inline] #[inline]
pub const fn rotate_left(self, n: u32) -> Self { pub const fn rotate_left(self, n: u32) -> Self {
(self << (n % $BITS)) | (self >> (($BITS - (n % $BITS)) % $BITS)) #[cfg(not(stage0))] {
unsafe { intrinsics::rotate_left(self, n as $SelfT) }
}
#[cfg(stage0)] {
(self << (n % $BITS)) | (self >> (($BITS - (n % $BITS)) % $BITS))
}
} }
} }
...@@ -2326,7 +2331,12 @@ pub const fn rotate_left(self, n: u32) -> Self { ...@@ -2326,7 +2331,12 @@ pub const fn rotate_left(self, n: u32) -> Self {
#[rustc_const_unstable(feature = "const_int_rotate")] #[rustc_const_unstable(feature = "const_int_rotate")]
#[inline] #[inline]
pub const fn rotate_right(self, n: u32) -> Self { pub const fn rotate_right(self, n: u32) -> Self {
(self >> (n % $BITS)) | (self << (($BITS - (n % $BITS)) % $BITS)) #[cfg(not(stage0))] {
unsafe { intrinsics::rotate_right(self, n as $SelfT) }
}
#[cfg(stage0)] {
(self >> (n % $BITS)) | (self << (($BITS - (n % $BITS)) % $BITS))
}
} }
} }
......
...@@ -726,6 +726,18 @@ fn declare_intrinsic(cx: &CodegenCx<'ll, '_>, key: &str) -> Option<&'ll Value> { ...@@ -726,6 +726,18 @@ fn declare_intrinsic(cx: &CodegenCx<'ll, '_>, key: &str) -> Option<&'ll Value> {
ifn!("llvm.bitreverse.i64", fn(t_i64) -> t_i64); ifn!("llvm.bitreverse.i64", fn(t_i64) -> t_i64);
ifn!("llvm.bitreverse.i128", fn(t_i128) -> t_i128); ifn!("llvm.bitreverse.i128", fn(t_i128) -> t_i128);
ifn!("llvm.fshl.i8", fn(t_i8, t_i8, t_i8) -> t_i8);
ifn!("llvm.fshl.i16", fn(t_i16, t_i16, t_i16) -> t_i16);
ifn!("llvm.fshl.i32", fn(t_i32, t_i32, t_i32) -> t_i32);
ifn!("llvm.fshl.i64", fn(t_i64, t_i64, t_i64) -> t_i64);
ifn!("llvm.fshl.i128", fn(t_i128, t_i128, t_i128) -> t_i128);
ifn!("llvm.fshr.i8", fn(t_i8, t_i8, t_i8) -> t_i8);
ifn!("llvm.fshr.i16", fn(t_i16, t_i16, t_i16) -> t_i16);
ifn!("llvm.fshr.i32", fn(t_i32, t_i32, t_i32) -> t_i32);
ifn!("llvm.fshr.i64", fn(t_i64, t_i64, t_i64) -> t_i64);
ifn!("llvm.fshr.i128", fn(t_i128, t_i128, t_i128) -> t_i128);
ifn!("llvm.sadd.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct!{t_i8, i1}); ifn!("llvm.sadd.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct!{t_i8, i1});
ifn!("llvm.sadd.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct!{t_i16, i1}); ifn!("llvm.sadd.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct!{t_i16, i1});
ifn!("llvm.sadd.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct!{t_i32, i1}); ifn!("llvm.sadd.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct!{t_i32, i1});
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
use llvm; use llvm;
use llvm::debuginfo::{DIType, DIFile, DIScope, DIDescriptor, use llvm::debuginfo::{DIType, DIFile, DIScope, DIDescriptor,
DICompositeType, DILexicalBlock, DIFlags}; DICompositeType, DILexicalBlock, DIFlags};
use llvm_util;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc::hir::CodegenFnAttrFlags; use rustc::hir::CodegenFnAttrFlags;
...@@ -1169,9 +1170,8 @@ fn prepare_union_metadata( ...@@ -1169,9 +1170,8 @@ fn prepare_union_metadata(
fn use_enum_fallback(cx: &CodegenCx) -> bool { fn use_enum_fallback(cx: &CodegenCx) -> bool {
// On MSVC we have to use the fallback mode, because LLVM doesn't // On MSVC we have to use the fallback mode, because LLVM doesn't
// lower variant parts to PDB. // lower variant parts to PDB.
return cx.sess().target.target.options.is_like_msvc || unsafe { return cx.sess().target.target.options.is_like_msvc
llvm::LLVMRustVersionMajor() < 7 || llvm_util::get_major_version() < 7;
};
} }
// Describes the members of an enum value: An enum is described as a union of // Describes the members of an enum value: An enum is described as a union of
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
use attributes; use attributes;
use intrinsics::{self, Intrinsic}; use intrinsics::{self, Intrinsic};
use llvm::{self, TypeKind}; use llvm::{self, TypeKind};
use llvm_util;
use abi::{Abi, FnType, LlvmType, PassMode}; use abi::{Abi, FnType, LlvmType, PassMode};
use mir::place::PlaceRef; use mir::place::PlaceRef;
use mir::operand::{OperandRef, OperandValue}; use mir::operand::{OperandRef, OperandValue};
...@@ -284,7 +285,8 @@ pub fn codegen_intrinsic_call( ...@@ -284,7 +285,8 @@ pub fn codegen_intrinsic_call(
"ctlz" | "ctlz_nonzero" | "cttz" | "cttz_nonzero" | "ctpop" | "bswap" | "ctlz" | "ctlz_nonzero" | "cttz" | "cttz_nonzero" | "ctpop" | "bswap" |
"bitreverse" | "add_with_overflow" | "sub_with_overflow" | "bitreverse" | "add_with_overflow" | "sub_with_overflow" |
"mul_with_overflow" | "overflowing_add" | "overflowing_sub" | "overflowing_mul" | "mul_with_overflow" | "overflowing_add" | "overflowing_sub" | "overflowing_mul" |
"unchecked_div" | "unchecked_rem" | "unchecked_shl" | "unchecked_shr" | "exact_div" => { "unchecked_div" | "unchecked_rem" | "unchecked_shl" | "unchecked_shr" | "exact_div" |
"rotate_left" | "rotate_right" => {
let ty = arg_tys[0]; let ty = arg_tys[0];
match int_type_width_signed(ty, cx) { match int_type_width_signed(ty, cx) {
Some((width, signed)) => Some((width, signed)) =>
...@@ -363,6 +365,27 @@ pub fn codegen_intrinsic_call( ...@@ -363,6 +365,27 @@ pub fn codegen_intrinsic_call(
} else { } else {
bx.lshr(args[0].immediate(), args[1].immediate()) bx.lshr(args[0].immediate(), args[1].immediate())
}, },
"rotate_left" | "rotate_right" => {
let is_left = name == "rotate_left";
let val = args[0].immediate();
let raw_shift = args[1].immediate();
if llvm_util::get_major_version() >= 7 {
// rotate = funnel shift with first two args the same
let llvm_name = &format!("llvm.fsh{}.i{}",
if is_left { 'l' } else { 'r' }, width);
let llfn = cx.get_intrinsic(llvm_name);
bx.call(llfn, &[val, val, raw_shift], None)
} else {
// rotate_left: (X << (S % BW)) | (X >> ((BW - S) % BW))
// rotate_right: (X << ((BW - S) % BW)) | (X >> (S % BW))
let width = C_uint(Type::ix(cx, width), width);
let shift = bx.urem(raw_shift, width);
let inv_shift = bx.urem(bx.sub(width, raw_shift), width);
let shift1 = bx.shl(val, if is_left { shift } else { inv_shift });
let shift2 = bx.lshr(val, if !is_left { shift } else { inv_shift });
bx.or(shift1, shift2)
}
},
_ => bug!(), _ => bug!(),
}, },
None => { None => {
......
...@@ -256,6 +256,10 @@ pub fn print_version() { ...@@ -256,6 +256,10 @@ pub fn print_version() {
} }
} }
pub fn get_major_version() -> u32 {
unsafe { llvm::LLVMRustVersionMajor() }
}
pub fn print_passes() { pub fn print_passes() {
// Can be called without initializing LLVM // Can be called without initializing LLVM
unsafe { llvm::LLVMRustPrintPasses(); } unsafe { llvm::LLVMRustPrintPasses(); }
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
use libc::c_uint; use libc::c_uint;
use llvm::{self, BasicBlock}; use llvm::{self, BasicBlock};
use llvm::debuginfo::DIScope; use llvm::debuginfo::DIScope;
use llvm_util;
use rustc::ty::{self, Ty, TypeFoldable, UpvarSubsts}; use rustc::ty::{self, Ty, TypeFoldable, UpvarSubsts};
use rustc::ty::layout::{LayoutOf, TyLayout}; use rustc::ty::layout::{LayoutOf, TyLayout};
use rustc::mir::{self, Mir}; use rustc::mir::{self, Mir};
...@@ -612,7 +613,7 @@ fn arg_local_refs( ...@@ -612,7 +613,7 @@ fn arg_local_refs(
// doesn't actually strip the offset when splitting the closure // doesn't actually strip the offset when splitting the closure
// environment into its components so it ends up out of bounds. // environment into its components so it ends up out of bounds.
// (cuviper) It seems to be fine without the alloca on LLVM 6 and later. // (cuviper) It seems to be fine without the alloca on LLVM 6 and later.
let env_alloca = !env_ref && unsafe { llvm::LLVMRustVersionMajor() < 6 }; let env_alloca = !env_ref && llvm_util::get_major_version() < 6;
let env_ptr = if env_alloca { let env_ptr = if env_alloca {
let scratch = PlaceRef::alloca(bx, let scratch = PlaceRef::alloca(bx,
bx.cx.layout_of(tcx.mk_mut_ptr(arg.layout.ty)), bx.cx.layout_of(tcx.mk_mut_ptr(arg.layout.ty)),
......
...@@ -150,6 +150,24 @@ pub fn emulate_intrinsic( ...@@ -150,6 +150,24 @@ pub fn emulate_intrinsic(
} }
self.write_scalar(val, dest)?; self.write_scalar(val, dest)?;
} }
"rotate_left" | "rotate_right" => {
// rotate_left: (X << (S % BW)) | (X >> ((BW - S) % BW))
// rotate_right: (X << ((BW - S) % BW)) | (X >> (S % BW))
let layout = self.layout_of(substs.type_at(0))?;
let val_bits = self.read_scalar(args[0])?.to_bits(layout.size)?;
let raw_shift_bits = self.read_scalar(args[1])?.to_bits(layout.size)?;
let width_bits = layout.size.bits() as u128;
let shift_bits = raw_shift_bits % width_bits;
let inv_shift_bits = (width_bits - raw_shift_bits) % width_bits;
let result_bits = if intrinsic_name == "rotate_left" {
(val_bits << shift_bits) | (val_bits >> inv_shift_bits)
} else {
(val_bits >> shift_bits) | (val_bits << inv_shift_bits)
};
let truncated_bits = self.truncate(result_bits, layout);
let result = Scalar::from_uint(truncated_bits, layout.size);
self.write_scalar(result, dest)?;
}
"transmute" => { "transmute" => {
self.copy_op_transmute(args[0], dest)?; self.copy_op_transmute(args[0], dest)?;
} }
......
...@@ -869,6 +869,8 @@ fn visit_terminator_kind(&mut self, ...@@ -869,6 +869,8 @@ fn visit_terminator_kind(&mut self,
| "overflowing_mul" | "overflowing_mul"
| "unchecked_shl" | "unchecked_shl"
| "unchecked_shr" | "unchecked_shr"
| "rotate_left"
| "rotate_right"
| "add_with_overflow" | "add_with_overflow"
| "sub_with_overflow" | "sub_with_overflow"
| "mul_with_overflow" | "mul_with_overflow"
......
...@@ -292,7 +292,8 @@ pub fn check_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, ...@@ -292,7 +292,8 @@ pub fn check_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
"unchecked_div" | "unchecked_rem" | "exact_div" => "unchecked_div" | "unchecked_rem" | "exact_div" =>
(1, vec![param(0), param(0)], param(0)), (1, vec![param(0), param(0)], param(0)),
"unchecked_shl" | "unchecked_shr" => "unchecked_shl" | "unchecked_shr" |
"rotate_left" | "rotate_right" =>
(1, vec![param(0), param(0)], param(0)), (1, vec![param(0), param(0)], param(0)),
"overflowing_add" | "overflowing_sub" | "overflowing_mul" => "overflowing_add" | "overflowing_sub" | "overflowing_mul" =>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册