提交 01cc5b3e 编写于 作者: G gnzlbg

add intrinsics for portable packed simd vector reductions

上级 e5acb0c8
......@@ -1201,6 +1201,46 @@ pub fn LLVMBuildInsertValue(B: BuilderRef,
Name: *const c_char)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFAdd(B: BuilderRef,
Acc: ValueRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFMul(B: BuilderRef,
Acc: ValueRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceAdd(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceMul(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceAnd(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceOr(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceXor(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceMin(B: BuilderRef,
Src: ValueRef,
IsSigned: bool)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceMax(B: BuilderRef,
Src: ValueRef,
IsSigned: bool)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFMin(B: BuilderRef,
Src: ValueRef,
IsNaN: bool)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFMax(B: BuilderRef,
Src: ValueRef,
IsNaN: bool)
-> ValueRef;
pub fn LLVMBuildIsNull(B: BuilderRef, Val: ValueRef, Name: *const c_char) -> ValueRef;
pub fn LLVMBuildIsNotNull(B: BuilderRef, Val: ValueRef, Name: *const c_char) -> ValueRef;
pub fn LLVMBuildPtrDiff(B: BuilderRef,
......
......@@ -955,6 +955,81 @@ pub fn vector_splat(&self, num_elts: usize, elt: ValueRef) -> ValueRef {
}
}
pub fn vector_reduce_fadd_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fadd_fast");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src);
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_fmul_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmul_fast");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src);
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_add(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.add");
unsafe {
llvm::LLVMRustBuildVectorReduceAdd(self.llbuilder, src)
}
}
pub fn vector_reduce_mul(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.mul");
unsafe {
llvm::LLVMRustBuildVectorReduceMul(self.llbuilder, src)
}
}
pub fn vector_reduce_and(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.and");
unsafe {
llvm::LLVMRustBuildVectorReduceAnd(self.llbuilder, src)
}
}
pub fn vector_reduce_or(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.or");
unsafe {
llvm::LLVMRustBuildVectorReduceOr(self.llbuilder, src)
}
}
pub fn vector_reduce_xor(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.xor");
unsafe {
llvm::LLVMRustBuildVectorReduceXor(self.llbuilder, src)
}
}
pub fn vector_reduce_fmin_fast(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmin_fast");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFMin(self.llbuilder, src, false);
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_fmax_fast(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmax_fast");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, false);
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_min(&self, src: ValueRef, is_signed: bool) -> ValueRef {
self.count_insn("vector.reduce.min");
unsafe {
llvm::LLVMRustBuildVectorReduceMin(self.llbuilder, src, is_signed)
}
}
pub fn vector_reduce_max(&self, src: ValueRef, is_signed: bool) -> ValueRef {
self.count_insn("vector.reduce.max");
unsafe {
llvm::LLVMRustBuildVectorReduceMax(self.llbuilder, src, is_signed)
}
}
pub fn extract_value(&self, agg_val: ValueRef, idx: u64) -> ValueRef {
self.count_insn("extractvalue");
assert_eq!(idx as c_uint as u64, idx);
......
......@@ -1018,14 +1018,22 @@ fn generic_simd_intrinsic<'a, 'tcx>(
name, $($fmt)*));
}
}
macro_rules! require {
($cond: expr, $($fmt: tt)*) => {
if !$cond {
macro_rules! return_error {
($($fmt: tt)*) => {
{
emit_error!($($fmt)*);
return Err(());
}
}
}
macro_rules! require {
($cond: expr, $($fmt: tt)*) => {
if !$cond {
return_error!($($fmt)*);
}
};
}
macro_rules! require_simd {
($ty: expr, $position: expr) => {
require!($ty.is_simd(), "expected SIMD {} type, found non-SIMD `{}`", $position, $ty)
......@@ -1142,6 +1150,211 @@ fn generic_simd_intrinsic<'a, 'tcx>(
return Ok(bx.extract_element(args[0].immediate(), args[1].immediate()))
}
if name == "simd_reduce_add" {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.vector_reduce_add(args[0].immediate()))
},
ty::TyUint(_u) => {
Ok(bx.vector_reduce_add(args[0].immediate()))
},
ty::TyFloat(f) => {
// undef as accumulator makes the reduction unordered:
let acc = match f.bit_width() {
32 => C_undef(Type::f32(bx.cx)),
64 => C_undef(Type::f64(bx.cx)),
v => {
return_error!(
"unsupported {} from `{}` with element `{}` of size `{}` to `{}`",
"simd_reduce_add", in_ty, in_elem, v, ret_ty)
}
};
Ok(bx.vector_reduce_fadd_fast(acc, args[0].immediate()))
}
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_add", in_ty, in_elem, ret_ty)
},
}
}
if name == "simd_reduce_mul" {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.vector_reduce_mul(args[0].immediate()))
},
ty::TyUint(_u) => {
Ok(bx.vector_reduce_mul(args[0].immediate()))
},
ty::TyFloat(f) => {
// undef as accumulator makes the reduction unordered:
let acc = match f.bit_width() {
32 => C_undef(Type::f32(bx.cx)),
64 => C_undef(Type::f64(bx.cx)),
v => {
return_error!(
"unsupported {} from `{}` with element `{}` of size `{}` to `{}`",
"simd_reduce_mul", in_ty, in_elem, v, ret_ty)
}
};
Ok(bx.vector_reduce_fmul_fast(acc, args[0].immediate()))
}
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_mul", in_ty, in_elem, ret_ty)
},
}
}
if name == "simd_reduce_min" {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.vector_reduce_min(args[0].immediate(), true))
},
ty::TyUint(_u) => {
Ok(bx.vector_reduce_min(args[0].immediate(), false))
},
ty::TyFloat(_f) => {
Ok(bx.vector_reduce_fmin_fast(args[0].immediate()))
}
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_min", in_ty, in_elem, ret_ty)
},
}
}
if name == "simd_reduce_max" {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.vector_reduce_max(args[0].immediate(), true))
},
ty::TyUint(_u) => {
Ok(bx.vector_reduce_max(args[0].immediate(), false))
},
ty::TyFloat(_f) => {
Ok(bx.vector_reduce_fmax_fast(args[0].immediate()))
}
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_max", in_ty, in_elem, ret_ty)
},
}
}
if name == "simd_reduce_and" {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.vector_reduce_and(args[0].immediate()))
},
ty::TyUint(_u) => {
Ok(bx.vector_reduce_and(args[0].immediate()))
},
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_and", in_ty, in_elem, ret_ty)
},
}
}
if name == "simd_reduce_or" {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.vector_reduce_or(args[0].immediate()))
},
ty::TyUint(_u) => {
Ok(bx.vector_reduce_or(args[0].immediate()))
},
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_or", in_ty, in_elem, ret_ty)
},
}
}
if name == "simd_reduce_xor" {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.vector_reduce_xor(args[0].immediate()))
},
ty::TyUint(_u) => {
Ok(bx.vector_reduce_xor(args[0].immediate()))
},
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_xor", in_ty, in_elem, ret_ty)
},
}
}
if name == "simd_reduce_all" {
//require!(ret_ty == in_elem,
// "expected return type `{}` (element of input `{}`), found `{}`",
// in_elem, in_ty, ret_ty);
let i1 = Type::i1(bx.cx);
let i1xn = Type::vector(&i1, in_len as u64);
let v = bx.trunc(args[0].immediate(), i1xn);
let red = match in_elem.sty {
ty::TyInt(_i) => {
bx.vector_reduce_and(v)
},
ty::TyUint(_u) => {
bx.vector_reduce_and(v)
},
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_and", in_ty, in_elem, ret_ty)
},
};
return Ok(bx.zext(red, Type::bool(bx.cx)));
}
if name == "simd_reduce_any" {
//require!(ret_ty == in_elem,
// "expected return type `{}` (element of input `{}`), found `{}`",
// in_elem, in_ty, ret_ty);
let i1 = Type::i1(bx.cx);
let i1xn = Type::vector(&i1, in_len as u64);
let v = bx.trunc(args[0].immediate(), i1xn);
let red = match in_elem.sty {
ty::TyInt(_i) => {
bx.vector_reduce_or(v)
},
ty::TyUint(_u) => {
bx.vector_reduce_or(v)
},
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_and", in_ty, in_elem, ret_ty)
},
};
return Ok(bx.zext(red, Type::bool(bx.cx)));
}
if name == "simd_cast" {
require_simd!(ret_ty, "return");
let out_len = ret_ty.simd_size(tcx);
......
......@@ -361,6 +361,11 @@ pub fn check_platform_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
"simd_insert" => (2, vec![param(0), tcx.types.u32, param(1)], param(0)),
"simd_extract" => (2, vec![param(0), tcx.types.u32], param(1)),
"simd_cast" => (2, vec![param(0)], param(1)),
"simd_reduce_all" | "simd_reduce_any" => (1, vec![param(0)], tcx.types.bool),
"simd_reduce_add" | "simd_reduce_mul" |
"simd_reduce_and" | "simd_reduce_or" | "simd_reduce_xor" |
"simd_reduce_min" | "simd_reduce_max"
=> (2, vec![param(0)], param(1)),
name if name.starts_with("simd_shuffle") => {
match name["simd_shuffle".len()..].parse() {
Ok(n) => {
......
......@@ -1395,3 +1395,49 @@ LLVMRustModuleCost(LLVMModuleRef M) {
auto f = unwrap(M)->functions();
return std::distance(std::begin(f), std::end(f));
}
// Vector reductions:
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFAdd(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateFAddReduce(unwrap(Acc),unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFMul(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateFMulReduce(unwrap(Acc),unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceAdd(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateAddReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceMul(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateMulReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceAnd(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateAndReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceOr(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateOrReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceXor(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateXorReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceMin(LLVMBuilderRef B, LLVMValueRef Src, bool IsSigned) {
return wrap(unwrap(B)->CreateIntMinReduce(unwrap(Src), IsSigned));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceMax(LLVMBuilderRef B, LLVMValueRef Src, bool IsSigned) {
return wrap(unwrap(B)->CreateIntMaxReduce(unwrap(Src), IsSigned));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFMin(LLVMBuilderRef B, LLVMValueRef Src, bool NoNaN) {
return wrap(unwrap(B)->CreateFPMinReduce(unwrap(Src), NoNaN));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFMax(LLVMBuilderRef B, LLVMValueRef Src, bool NoNaN) {
return wrap(unwrap(B)->CreateFPMaxReduce(unwrap(Src), NoNaN));
}
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Test that the simd_reduce_{op} intrinsics produce the correct results.
#![feature(repr_simd, platform_intrinsics)]
#[allow(non_camel_case_types)]
#[repr(simd)]
#[derive(Copy, Clone)]
struct i32x4(pub i32, pub i32, pub i32, pub i32);
#[repr(simd)]
#[derive(Copy, Clone)]
struct u32x4(pub u32, pub u32, pub u32, pub u32);
#[repr(simd)]
#[derive(Copy, Clone)]
struct f32x4(pub f32, pub f32, pub f32, pub f32);
#[repr(simd)]
#[derive(Copy, Clone)]
struct b8x4(pub i8, pub i8, pub i8, pub i8);
#[repr(simd)]
#[derive(Copy, Clone)]
struct b8x16(
pub i8, pub i8, pub i8, pub i8,
pub i8, pub i8, pub i8, pub i8,
pub i8, pub i8, pub i8, pub i8,
pub i8, pub i8, pub i8, pub i8
);
extern "platform-intrinsic" {
fn simd_reduce_add<T, U>(x: T) -> U;
fn simd_reduce_mul<T, U>(x: T) -> U;
fn simd_reduce_min<T, U>(x: T) -> U;
fn simd_reduce_max<T, U>(x: T) -> U;
fn simd_reduce_and<T, U>(x: T) -> U;
fn simd_reduce_or<T, U>(x: T) -> U;
fn simd_reduce_xor<T, U>(x: T) -> U;
fn simd_reduce_all<T>(x: T) -> bool;
fn simd_reduce_any<T>(x: T) -> bool;
}
fn main() {
unsafe {
let x = i32x4(1, -2, 3, 4);
let r: i32 = simd_reduce_add(x);
assert!(r == 6_i32);
let r: i32 = simd_reduce_mul(x);
assert!(r == -24_i32);
let r: i32 = simd_reduce_min(x);
assert!(r == -21_i32);
let r: i32 = simd_reduce_max(x);
assert!(r == 4_i32);
let x = i32x4(-1, -1, -1, -1);
let r: i32 = simd_reduce_and(x);
assert!(r == -1_i32);
let r: i32 = simd_reduce_or(x);
assert!(r == -1_i32);
let r: i32 = simd_reduce_xor(x);
assert!(r == 0_i32);
let x = i32x4(-1, -1, 0, -1);
let r: i32 = simd_reduce_and(x);
assert!(r == 0_i32);
let r: i32 = simd_reduce_or(x);
assert!(r == -1_i32);
let r: i32 = simd_reduce_xor(x);
assert!(r == -1_i32);
}
unsafe {
let x = u32x4(1, 2, 3, 4);
let r: u32 = simd_reduce_add(x);
assert!(r == 10_u32);
let r: u32 = simd_reduce_mul(x);
assert!(r == 24_u32);
let r: u32 = simd_reduce_min(x);
assert!(r == 1_u32);
let r: u32 = simd_reduce_max(x);
assert!(r == 4_u32);
let t = u32::max_value();
let x = u32x4(t, t, t, t);
let r: u32 = simd_reduce_and(x);
assert!(r == t);
let r: u32 = simd_reduce_or(x);
assert!(r == t);
let r: u32 = simd_reduce_xor(x);
assert!(r == 0_u32);
let x = u32x4(t, t, 0, t);
let r: u32 = simd_reduce_and(x);
assert!(r == 0_u32);
let r: u32 = simd_reduce_or(x);
assert!(r == t);
let r: u32 = simd_reduce_xor(x);
assert!(r == t);
}
unsafe {
let x = f32x4(1., -2., 3., 4.);
let r: f32 = simd_reduce_add(x);
assert!(r == 6_f32);
let r: f32 = simd_reduce_mul(x);
assert!(r == -24_f32);
let r: f32 = simd_reduce_min(x);
assert!(r == -2_f32);
let r: f32 = simd_reduce_max(x);
assert!(r == 4_f32);
}
unsafe {
let x = b8x4(!0, !0, !0, !0);
let r: bool = simd_reduce_all(x);
//let r: bool = foobar(x);
assert!(r);
let r: bool = simd_reduce_any(x);
assert!(r);
let x = b8x4(!0, !0, 0, !0);
let r: bool = simd_reduce_all(x);
assert!(!r);
let r: bool = simd_reduce_any(x);
assert!(r);
let x = b8x4(0, 0, 0, 0);
let r: bool = simd_reduce_all(x);
assert!(!r);
let r: bool = simd_reduce_any(x);
assert!(!r);
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册