Add intrinsics for float arithmetic with `fast` flag enabled

`fast` a.k.a UnsafeAlgebra is the flag for enabling all "unsafe" (according to llvm) float optimizations. See LangRef for more information http://llvm.org/docs/LangRef.html#fast-math-flags Providing these operations with less precise associativity rules (for example) is useful to numerical applications. For example, the summation loop: let sum = 0.; for element in data { sum += *element; } Using the default floating point semantics, this loop expresses the floats must be added in a sequence, one after another. This constraint is usually completely unintended, and it means that no autovectorization is possible.

Add intrinsics for float arithmetic with `fast` flag enabled
`fast` a.k.a UnsafeAlgebra is the flag for enabling all "unsafe" (according to llvm) float optimizations. See LangRef for more information http://llvm.org/docs/LangRef.html#fast-math-flags Providing these operations with less precise associativity rules (for example) is useful to numerical applications. For example, the summation loop: let sum = 0.; for element in data { sum += *element; } Using the default floating point semantics, this loop expresses the floats must be added in a sequence, one after another. This constraint is usually completely unintended, and it means that no autovectorization is possible.
2dbac1fb · Ulrik Sverdrup · 235d7745 · 2dbac1fb · 2dbac1fb · 2dbac1fb
9 changed file
--- a/src/libcore/intrinsics.rs
+++ b/src/libcore/intrinsics.rs
@@ -539,6 +539,32 @@ pub fn volatile_copy_nonoverlapping_memory<T>(dst: *mut T, src: *const T,
    /// Returns the nearest integer to an `f64`. Rounds half-way cases away from zero.
    pub fn roundf64(x: f64) -> f64;

+    /// Float addition that allows optimizations based on algebraic rules.
+    /// May assume inputs are finite.
+    #[cfg(not(stage0))]
+    pub fn fadd_fast<T>(a: T, b: T) -> T;
+
+    /// Float subtraction that allows optimizations based on algebraic rules.
+    /// May assume inputs are finite.
+    #[cfg(not(stage0))]
+    pub fn fsub_fast<T>(a: T, b: T) -> T;
+
+    /// Float multiplication that allows optimizations based on algebraic rules.
+    /// May assume inputs are finite.
+    #[cfg(not(stage0))]
+    pub fn fmul_fast<T>(a: T, b: T) -> T;
+
+    /// Float division that allows optimizations based on algebraic rules.
+    /// May assume inputs are finite.
+    #[cfg(not(stage0))]
+    pub fn fdiv_fast<T>(a: T, b: T) -> T;
+
+    /// Float remainder that allows optimizations based on algebraic rules.
+    /// May assume inputs are finite.
+    #[cfg(not(stage0))]
+    pub fn frem_fast<T>(a: T, b: T) -> T;
+
+
    /// Returns the number of bits set in an integer type `T`
    pub fn ctpop<T>(x: T) -> T;


--- a/src/librustc_llvm/lib.rs
+++ b/src/librustc_llvm/lib.rs
@@ -1310,6 +1310,7 @@ pub fn LLVMBuildFNeg(B: BuilderRef, V: ValueRef, Name: *const c_char)
                         -> ValueRef;
    pub fn LLVMBuildNot(B: BuilderRef, V: ValueRef, Name: *const c_char)
                        -> ValueRef;
+    pub fn LLVMRustSetHasUnsafeAlgebra(Instr: ValueRef);

    /* Memory */
    pub fn LLVMBuildAlloca(B: BuilderRef, Ty: TypeRef, Name: *const c_char)

--- a/src/librustc_trans/trans/build.rs
+++ b/src/librustc_trans/trans/build.rs
@@ -221,6 +221,18 @@ pub fn FAdd(cx: Block,
    B(cx).fadd(lhs, rhs)
 }

+pub fn FAddFast(cx: Block,
+                lhs: ValueRef,
+                rhs: ValueRef,
+                debug_loc: DebugLoc)
+            -> ValueRef {
+    if cx.unreachable.get() {
+        return _Undef(lhs);
+    }
+    debug_loc.apply(cx.fcx);
+    B(cx).fadd_fast(lhs, rhs)
+}
+
 pub fn Sub(cx: Block,
           lhs: ValueRef,
           rhs: ValueRef,
@@ -269,6 +281,18 @@ pub fn FSub(cx: Block,
    B(cx).fsub(lhs, rhs)
 }

+pub fn FSubFast(cx: Block,
+                lhs: ValueRef,
+                rhs: ValueRef,
+                debug_loc: DebugLoc)
+            -> ValueRef {
+    if cx.unreachable.get() {
+        return _Undef(lhs);
+    }
+    debug_loc.apply(cx.fcx);
+    B(cx).fsub_fast(lhs, rhs)
+}
+
 pub fn Mul(cx: Block,
           lhs: ValueRef,
           rhs: ValueRef,
@@ -317,6 +341,18 @@ pub fn FMul(cx: Block,
    B(cx).fmul(lhs, rhs)
 }

+pub fn FMulFast(cx: Block,
+                lhs: ValueRef,
+                rhs: ValueRef,
+                debug_loc: DebugLoc)
+            -> ValueRef {
+    if cx.unreachable.get() {
+        return _Undef(lhs);
+    }
+    debug_loc.apply(cx.fcx);
+    B(cx).fmul_fast(lhs, rhs)
+}
+
 pub fn UDiv(cx: Block,
            lhs: ValueRef,
            rhs: ValueRef,
@@ -365,6 +401,18 @@ pub fn FDiv(cx: Block,
    B(cx).fdiv(lhs, rhs)
 }

+pub fn FDivFast(cx: Block,
+                lhs: ValueRef,
+                rhs: ValueRef,
+                debug_loc: DebugLoc)
+            -> ValueRef {
+    if cx.unreachable.get() {
+        return _Undef(lhs);
+    }
+    debug_loc.apply(cx.fcx);
+    B(cx).fdiv_fast(lhs, rhs)
+}
+
 pub fn URem(cx: Block,
            lhs: ValueRef,
            rhs: ValueRef,
@@ -401,6 +449,18 @@ pub fn FRem(cx: Block,
    B(cx).frem(lhs, rhs)
 }

+pub fn FRemFast(cx: Block,
+                lhs: ValueRef,
+                rhs: ValueRef,
+                debug_loc: DebugLoc)
+            -> ValueRef {
+    if cx.unreachable.get() {
+        return _Undef(lhs);
+    }
+    debug_loc.apply(cx.fcx);
+    B(cx).frem_fast(lhs, rhs)
+}
+
 pub fn Shl(cx: Block,
           lhs: ValueRef,
           rhs: ValueRef,

--- a/src/librustc_trans/trans/builder.rs
+++ b/src/librustc_trans/trans/builder.rs
@@ -226,6 +226,15 @@ pub fn fadd(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
        }
    }

+    pub fn fadd_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
+        self.count_insn("fadd");
+        unsafe {
+            let instr = llvm::LLVMBuildFAdd(self.llbuilder, lhs, rhs, noname());
+            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            instr
+        }
+    }
+
    pub fn sub(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
        self.count_insn("sub");
        unsafe {
@@ -254,6 +263,15 @@ pub fn fsub(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
        }
    }

+    pub fn fsub_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
+        self.count_insn("sub");
+        unsafe {
+            let instr = llvm::LLVMBuildFSub(self.llbuilder, lhs, rhs, noname());
+            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            instr
+        }
+    }
+
    pub fn mul(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
        self.count_insn("mul");
        unsafe {
@@ -282,6 +300,16 @@ pub fn fmul(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
        }
    }

+    pub fn fmul_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
+        self.count_insn("fmul");
+        unsafe {
+            let instr = llvm::LLVMBuildFMul(self.llbuilder, lhs, rhs, noname());
+            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            instr
+        }
+    }
+
+
    pub fn udiv(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
        self.count_insn("udiv");
        unsafe {
@@ -310,6 +338,15 @@ pub fn fdiv(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
        }
    }

+    pub fn fdiv_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
+        self.count_insn("fdiv");
+        unsafe {
+            let instr = llvm::LLVMBuildFDiv(self.llbuilder, lhs, rhs, noname());
+            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            instr
+        }
+    }
+
    pub fn urem(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
        self.count_insn("urem");
        unsafe {
@@ -331,6 +368,15 @@ pub fn frem(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
        }
    }

+    pub fn frem_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
+        self.count_insn("frem");
+        unsafe {
+            let instr = llvm::LLVMBuildFRem(self.llbuilder, lhs, rhs, noname());
+            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            instr
+        }
+    }
+
    pub fn shl(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
        self.count_insn("shl");
        unsafe {

--- a/src/librustc_trans/trans/intrinsic.rs
+++ b/src/librustc_trans/trans/intrinsic.rs
@@ -658,6 +658,29 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
            }

        },
+        (_, "fadd_fast") | (_, "fsub_fast") | (_, "fmul_fast") | (_, "fdiv_fast") |
+        (_, "frem_fast") => {
+            let sty = &arg_tys[0].sty;
+            match float_type_width(sty) {
+                Some(_width) =>
+                    match &*name {
+                        "fadd_fast" => FAddFast(bcx, llargs[0], llargs[1], call_debug_location),
+                        "fsub_fast" => FSubFast(bcx, llargs[0], llargs[1], call_debug_location),
+                        "fmul_fast" => FMulFast(bcx, llargs[0], llargs[1], call_debug_location),
+                        "fdiv_fast" => FDivFast(bcx, llargs[0], llargs[1], call_debug_location),
+                        "frem_fast" => FRemFast(bcx, llargs[0], llargs[1], call_debug_location),
+                        _ => unreachable!(),
+                    },
+                None => {
+                    span_invalid_monomorphization_error(
+                        tcx.sess, span,
+                        &format!("invalid monomorphization of `{}` intrinsic: \
+                                  expected basic float type, found `{}`", name, sty));
+                        C_null(llret_ty)
+                }
+            }
+
+        },


        (_, "return_address") => {
@@ -1700,3 +1723,17 @@ fn int_type_width_signed<'tcx>(sty: &ty::TypeVariants<'tcx>, ccx: &CrateContext)
        _ => None,
    }
 }
+
+// Returns the width of a float TypeVariant
+// Returns None if the type is not a float
+fn float_type_width<'tcx>(sty: &ty::TypeVariants<'tcx>)
+        -> Option<u64> {
+    use rustc::middle::ty::TyFloat;
+    match *sty {
+        TyFloat(t) => Some(match t {
+            ast::FloatTy::F32 => 32,
+            ast::FloatTy::F64 => 64,
+        }),
+        _ => None,
+    }
+}
--- a/src/librustc_typeck/check/intrinsic.rs
+++ b/src/librustc_typeck/check/intrinsic.rs
@@ -280,6 +280,8 @@ fn param<'a, 'tcx>(ccx: &CrateCtxt<'a, 'tcx>, n: u32) -> Ty<'tcx> {

            "overflowing_add" | "overflowing_sub" | "overflowing_mul" =>
                (1, vec![param(ccx, 0), param(ccx, 0)], param(ccx, 0)),
+            "fadd_fast" | "fsub_fast" | "fmul_fast" | "fdiv_fast" | "frem_fast" =>
+                (1, vec![param(ccx, 0), param(ccx, 0)], param(ccx, 0)),

            "return_address" => (0, vec![], tcx.mk_imm_ptr(tcx.types.u8)),


--- a/src/rustllvm/RustWrapper.cpp
+++ b/src/rustllvm/RustWrapper.cpp
@@ -164,6 +164,11 @@ extern "C" void LLVMRemoveFunctionAttrString(LLVMValueRef fn, unsigned index, co
                                          to_remove));
 }

+// enable fpmath flag UnsafeAlgebra
+extern "C" void LLVMRustSetHasUnsafeAlgebra(LLVMValueRef Instr) {
+    unwrap<Instruction>(Instr)->setHasUnsafeAlgebra(true);
+}
+
 extern "C" LLVMValueRef LLVMBuildAtomicLoad(LLVMBuilderRef B,
                                            LLVMValueRef source,
                                            const char* Name,

--- a/src/test/codegen/float_math.rs
+++ b/src/test/codegen/float_math.rs
+// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// compile-flags: -C no-prepopulate-passes
+
+#![crate_type = "lib"]
+#![feature(core_intrinsics)]
+
+use std::intrinsics::{fadd_fast, fsub_fast, fmul_fast, fdiv_fast, frem_fast};
+
+// CHECK-LABEL: @add
+#[no_mangle]
+pub fn add(x: f32, y: f32) -> f32 {
+// CHECK: fadd float
+// CHECK-NOT fast
+    x + y
+}
+
+// CHECK-LABEL: @addition
+#[no_mangle]
+pub fn addition(x: f32, y: f32) -> f32 {
+// CHECK: fadd fast float
+    unsafe {
+        fadd_fast(x, y)
+    }
+}
+
+// CHECK-LABEL: @subtraction
+#[no_mangle]
+pub fn subtraction(x: f32, y: f32) -> f32 {
+// CHECK: fsub fast float
+    unsafe {
+        fsub_fast(x, y)
+    }
+}
+
+// CHECK-LABEL: @multiplication
+#[no_mangle]
+pub fn multiplication(x: f32, y: f32) -> f32 {
+// CHECK: fmul fast float
+    unsafe {
+        fmul_fast(x, y)
+    }
+}
+
+// CHECK-LABEL: @division
+#[no_mangle]
+pub fn division(x: f32, y: f32) -> f32 {
+// CHECK: fdiv fast float
+    unsafe {
+        fdiv_fast(x, y)
+    }
+}
--- a/src/test/run-pass/float_math.rs
+++ b/src/test/run-pass/float_math.rs
+// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![feature(core_intrinsics)]
+
+use std::intrinsics::{fadd_fast, fsub_fast, fmul_fast, fdiv_fast, frem_fast};
+
+fn main() {
+    // make sure they all map to the correct operation
+    unsafe {
+        assert_eq!(fadd_fast(1., 2.), 1. + 2.);
+        assert_eq!(fsub_fast(1., 2.), 1. - 2.);
+        assert_eq!(fmul_fast(2., 3.), 2. * 3.);
+        assert_eq!(fdiv_fast(10., 5.), 10. / 5.);
+        assert_eq!(frem_fast(10., 5.), 10. % 5.);
+    }
+}