提交 4c40ff6a 编写于 作者: N Nikita Popov

Implement rotate using funnel shift on LLVM >= 7

Implement the rotate_left and rotate_right operations using
llvm.fshl and llvm.fshr if they are available (LLVM >= 7).

Originally I wanted to expose the funnel_shift_left and
funnel_shift_right intrinsics and implement rotate_left and
rotate_right on top of them. However, emulation of funnel
shifts requires emitting a conditional to check for zero shift
amount, which is not necessary for rotates. I was uncomfortable
doing that here, as I don't want to rely on LLVM to optimize
away that conditional (and for variable rotates, I'm not sure it
can). We should revisit that question when we raise our minimum
version requirement to LLVM 7 and don't need emulation code
anymore.
上级 2ad8c7b3
......@@ -1465,6 +1465,20 @@ pub fn volatile_copy_nonoverlapping_memory<T>(dst: *mut T, src: *const T,
/// y < 0 or y >= N, where N is the width of T in bits.
pub fn unchecked_shr<T>(x: T, y: T) -> T;
/// Performs rotate left.
/// The stabilized versions of this intrinsic are available on the integer
/// primitives via the `rotate_left` method. For example,
/// [`std::u32::rotate_left`](../../std/primitive.u32.html#method.rotate_left)
#[cfg(not(stage0))]
pub fn rotate_left<T>(x: T, y: T) -> T;
/// Performs rotate right.
/// The stabilized versions of this intrinsic are available on the integer
/// primitives via the `rotate_right` method. For example,
/// [`std::u32::rotate_right`](../../std/primitive.u32.html#method.rotate_right)
#[cfg(not(stage0))]
pub fn rotate_right<T>(x: T, y: T) -> T;
/// Returns (a + b) mod 2<sup>N</sup>, where N is the width of T in bits.
/// The stabilized versions of this intrinsic are available on the integer
/// primitives via the `wrapping_add` method. For example,
......
......@@ -2301,7 +2301,12 @@ pub const fn trailing_zeros(self) -> u32 {
#[rustc_const_unstable(feature = "const_int_rotate")]
#[inline]
pub const fn rotate_left(self, n: u32) -> Self {
(self << (n % $BITS)) | (self >> (($BITS - (n % $BITS)) % $BITS))
#[cfg(not(stage0))] {
unsafe { intrinsics::rotate_left(self, n as $SelfT) }
}
#[cfg(stage0)] {
(self << (n % $BITS)) | (self >> (($BITS - (n % $BITS)) % $BITS))
}
}
}
......@@ -2326,7 +2331,12 @@ pub const fn rotate_left(self, n: u32) -> Self {
#[rustc_const_unstable(feature = "const_int_rotate")]
#[inline]
pub const fn rotate_right(self, n: u32) -> Self {
(self >> (n % $BITS)) | (self << (($BITS - (n % $BITS)) % $BITS))
#[cfg(not(stage0))] {
unsafe { intrinsics::rotate_right(self, n as $SelfT) }
}
#[cfg(stage0)] {
(self >> (n % $BITS)) | (self << (($BITS - (n % $BITS)) % $BITS))
}
}
}
......
......@@ -726,6 +726,18 @@ fn declare_intrinsic(cx: &CodegenCx<'ll, '_>, key: &str) -> Option<&'ll Value> {
ifn!("llvm.bitreverse.i64", fn(t_i64) -> t_i64);
ifn!("llvm.bitreverse.i128", fn(t_i128) -> t_i128);
ifn!("llvm.fshl.i8", fn(t_i8, t_i8, t_i8) -> t_i8);
ifn!("llvm.fshl.i16", fn(t_i16, t_i16, t_i16) -> t_i16);
ifn!("llvm.fshl.i32", fn(t_i32, t_i32, t_i32) -> t_i32);
ifn!("llvm.fshl.i64", fn(t_i64, t_i64, t_i64) -> t_i64);
ifn!("llvm.fshl.i128", fn(t_i128, t_i128, t_i128) -> t_i128);
ifn!("llvm.fshr.i8", fn(t_i8, t_i8, t_i8) -> t_i8);
ifn!("llvm.fshr.i16", fn(t_i16, t_i16, t_i16) -> t_i16);
ifn!("llvm.fshr.i32", fn(t_i32, t_i32, t_i32) -> t_i32);
ifn!("llvm.fshr.i64", fn(t_i64, t_i64, t_i64) -> t_i64);
ifn!("llvm.fshr.i128", fn(t_i128, t_i128, t_i128) -> t_i128);
ifn!("llvm.sadd.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct!{t_i8, i1});
ifn!("llvm.sadd.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct!{t_i16, i1});
ifn!("llvm.sadd.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct!{t_i32, i1});
......
......@@ -23,6 +23,7 @@
use llvm;
use llvm::debuginfo::{DIType, DIFile, DIScope, DIDescriptor,
DICompositeType, DILexicalBlock, DIFlags};
use llvm_util;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc::hir::CodegenFnAttrFlags;
......@@ -1169,9 +1170,8 @@ fn prepare_union_metadata(
fn use_enum_fallback(cx: &CodegenCx) -> bool {
// On MSVC we have to use the fallback mode, because LLVM doesn't
// lower variant parts to PDB.
return cx.sess().target.target.options.is_like_msvc || unsafe {
llvm::LLVMRustVersionMajor() < 7
};
return cx.sess().target.target.options.is_like_msvc
|| llvm_util::get_major_version() < 7;
}
// Describes the members of an enum value: An enum is described as a union of
......
......@@ -13,6 +13,7 @@
use attributes;
use intrinsics::{self, Intrinsic};
use llvm::{self, TypeKind};
use llvm_util;
use abi::{Abi, FnType, LlvmType, PassMode};
use mir::place::PlaceRef;
use mir::operand::{OperandRef, OperandValue};
......@@ -284,7 +285,8 @@ pub fn codegen_intrinsic_call(
"ctlz" | "ctlz_nonzero" | "cttz" | "cttz_nonzero" | "ctpop" | "bswap" |
"bitreverse" | "add_with_overflow" | "sub_with_overflow" |
"mul_with_overflow" | "overflowing_add" | "overflowing_sub" | "overflowing_mul" |
"unchecked_div" | "unchecked_rem" | "unchecked_shl" | "unchecked_shr" | "exact_div" => {
"unchecked_div" | "unchecked_rem" | "unchecked_shl" | "unchecked_shr" | "exact_div" |
"rotate_left" | "rotate_right" => {
let ty = arg_tys[0];
match int_type_width_signed(ty, cx) {
Some((width, signed)) =>
......@@ -363,6 +365,27 @@ pub fn codegen_intrinsic_call(
} else {
bx.lshr(args[0].immediate(), args[1].immediate())
},
"rotate_left" | "rotate_right" => {
let is_left = name == "rotate_left";
let val = args[0].immediate();
let raw_shift = args[1].immediate();
if llvm_util::get_major_version() >= 7 {
// rotate = funnel shift with first two args the same
let llvm_name = &format!("llvm.fsh{}.i{}",
if is_left { 'l' } else { 'r' }, width);
let llfn = cx.get_intrinsic(llvm_name);
bx.call(llfn, &[val, val, raw_shift], None)
} else {
// rotate_left: (X << (S % BW)) | (X >> ((BW - S) % BW))
// rotate_right: (X << ((BW - S) % BW)) | (X >> (S % BW))
let width = C_uint(Type::ix(cx, width), width);
let shift = bx.urem(raw_shift, width);
let inv_shift = bx.urem(bx.sub(width, raw_shift), width);
let shift1 = bx.shl(val, if is_left { shift } else { inv_shift });
let shift2 = bx.lshr(val, if !is_left { shift } else { inv_shift });
bx.or(shift1, shift2)
}
},
_ => bug!(),
},
None => {
......
......@@ -256,6 +256,10 @@ pub fn print_version() {
}
}
pub fn get_major_version() -> u32 {
unsafe { llvm::LLVMRustVersionMajor() }
}
pub fn print_passes() {
// Can be called without initializing LLVM
unsafe { llvm::LLVMRustPrintPasses(); }
......
......@@ -12,6 +12,7 @@
use libc::c_uint;
use llvm::{self, BasicBlock};
use llvm::debuginfo::DIScope;
use llvm_util;
use rustc::ty::{self, Ty, TypeFoldable, UpvarSubsts};
use rustc::ty::layout::{LayoutOf, TyLayout};
use rustc::mir::{self, Mir};
......@@ -612,7 +613,7 @@ fn arg_local_refs(
// doesn't actually strip the offset when splitting the closure
// environment into its components so it ends up out of bounds.
// (cuviper) It seems to be fine without the alloca on LLVM 6 and later.
let env_alloca = !env_ref && unsafe { llvm::LLVMRustVersionMajor() < 6 };
let env_alloca = !env_ref && llvm_util::get_major_version() < 6;
let env_ptr = if env_alloca {
let scratch = PlaceRef::alloca(bx,
bx.cx.layout_of(tcx.mk_mut_ptr(arg.layout.ty)),
......
......@@ -150,6 +150,24 @@ pub fn emulate_intrinsic(
}
self.write_scalar(val, dest)?;
}
"rotate_left" | "rotate_right" => {
// rotate_left: (X << (S % BW)) | (X >> ((BW - S) % BW))
// rotate_right: (X << ((BW - S) % BW)) | (X >> (S % BW))
let layout = self.layout_of(substs.type_at(0))?;
let val_bits = self.read_scalar(args[0])?.to_bits(layout.size)?;
let raw_shift_bits = self.read_scalar(args[1])?.to_bits(layout.size)?;
let width_bits = layout.size.bits() as u128;
let shift_bits = raw_shift_bits % width_bits;
let inv_shift_bits = (width_bits - raw_shift_bits) % width_bits;
let result_bits = if intrinsic_name == "rotate_left" {
(val_bits << shift_bits) | (val_bits >> inv_shift_bits)
} else {
(val_bits >> shift_bits) | (val_bits << inv_shift_bits)
};
let truncated_bits = self.truncate(result_bits, layout);
let result = Scalar::from_uint(truncated_bits, layout.size);
self.write_scalar(result, dest)?;
}
"transmute" => {
self.copy_op_transmute(args[0], dest)?;
}
......
......@@ -869,6 +869,8 @@ fn visit_terminator_kind(&mut self,
| "overflowing_mul"
| "unchecked_shl"
| "unchecked_shr"
| "rotate_left"
| "rotate_right"
| "add_with_overflow"
| "sub_with_overflow"
| "mul_with_overflow"
......
......@@ -292,7 +292,8 @@ pub fn check_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
"unchecked_div" | "unchecked_rem" | "exact_div" =>
(1, vec![param(0), param(0)], param(0)),
"unchecked_shl" | "unchecked_shr" =>
"unchecked_shl" | "unchecked_shr" |
"rotate_left" | "rotate_right" =>
(1, vec![param(0), param(0)], param(0)),
"overflowing_add" | "overflowing_sub" | "overflowing_mul" =>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册