提交 54807d47 编写于 作者: S Szabolcs Nagy 提交者: Rich Felker

aarch64: add single instruction math functions

this should increase performance and reduce code size on aarch64.

the compiled code was checked against using __builtin_* instead
of inline asm with gcc-6.2.0.

lrint is two instructions.

c with inline asm is used because it is safer than a pure asm
implementation, this prevents ll{rint,round} to be an alias
of l{rint,round} (because the types don't match) and depends
on gcc style inline asm support.

ceil, floor, round, trunc can either raise inexact on finite
non-integer inputs or not raise any exceptions. the new
implementation does not raise exceptions while the generic
c code does.

on aarch64, the underflow exception is signaled before rounding
(ieee 754 allows both before and after rounding, but it must be
consistent), the generic fma c code signals it after rounding
so using single instruction fixes a slight conformance issue too.
上级 b6e1fe0d
#include <math.h>
double ceil(double x)
{
__asm__ ("frintp %d0, %d1" : "=w"(x) : "w"(x));
return x;
}
#include <math.h>
float ceilf(float x)
{
__asm__ ("frintp %s0, %s1" : "=w"(x) : "w"(x));
return x;
}
#include <math.h>
double fabs(double x)
{
__asm__ ("fabs %d0, %d1" : "=w"(x) : "w"(x));
return x;
}
.text
.global fabs
.type fabs,%function
fabs:
fabs d0, d0
ret
#include <math.h>
float fabsf(float x)
{
__asm__ ("fabs %s0, %s1" : "=w"(x) : "w"(x));
return x;
}
.text
.global fabsf
.type fabsf,%function
fabsf:
fabs s0, s0
ret
#include <math.h>
double floor(double x)
{
__asm__ ("frintm %d0, %d1" : "=w"(x) : "w"(x));
return x;
}
#include <math.h>
float floorf(float x)
{
__asm__ ("frintm %s0, %s1" : "=w"(x) : "w"(x));
return x;
}
#include <math.h>
double fma(double x, double y, double z)
{
__asm__ ("fmadd %d0, %d1, %d2, %d3" : "=w"(x) : "w"(x), "w"(y), "w"(z));
return x;
}
#include <math.h>
float fmaf(float x, float y, float z)
{
__asm__ ("fmadd %s0, %s1, %s2, %s3" : "=w"(x) : "w"(x), "w"(y), "w"(z));
return x;
}
#include <math.h>
double fmax(double x, double y)
{
__asm__ ("fmaxnm %d0, %d1, %d2" : "=w"(x) : "w"(x), "w"(y));
return x;
}
#include <math.h>
float fmaxf(float x, float y)
{
__asm__ ("fmaxnm %s0, %s1, %s2" : "=w"(x) : "w"(x), "w"(y));
return x;
}
#include <math.h>
double fmin(double x, double y)
{
__asm__ ("fminnm %d0, %d1, %d2" : "=w"(x) : "w"(x), "w"(y));
return x;
}
#include <math.h>
float fminf(float x, float y)
{
__asm__ ("fminnm %s0, %s1, %s2" : "=w"(x) : "w"(x), "w"(y));
return x;
}
#include <math.h>
long long llrint(double x)
{
long long n;
__asm__ (
"frintx %d1, %d1\n"
"fcvtzs %x0, %d1\n" : "=r"(n), "+w"(x));
return n;
}
#include <math.h>
long long llrintf(float x)
{
long long n;
__asm__ (
"frintx %s1, %s1\n"
"fcvtzs %x0, %s1\n" : "=r"(n), "+w"(x));
return n;
}
#include <math.h>
long long llround(double x)
{
long long n;
__asm__ ("fcvtas %x0, %d1" : "=r"(n) : "w"(x));
return n;
}
#include <math.h>
long long llroundf(float x)
{
long long n;
__asm__ ("fcvtas %x0, %s1" : "=r"(n) : "w"(x));
return n;
}
#include <math.h>
long lrint(double x)
{
long n;
__asm__ (
"frintx %d1, %d1\n"
"fcvtzs %x0, %d1\n" : "=r"(n), "+w"(x));
return n;
}
#include <math.h>
long lrintf(float x)
{
long n;
__asm__ (
"frintx %s1, %s1\n"
"fcvtzs %x0, %s1\n" : "=r"(n), "+w"(x));
return n;
}
#include <math.h>
long lround(double x)
{
long n;
__asm__ ("fcvtas %x0, %d1" : "=r"(n) : "w"(x));
return n;
}
#include <math.h>
long lroundf(float x)
{
long n;
__asm__ ("fcvtas %x0, %s1" : "=r"(n) : "w"(x));
return n;
}
#include <math.h>
double nearbyint(double x)
{
__asm__ ("frinti %d0, %d1" : "=w"(x) : "w"(x));
return x;
}
#include <math.h>
float nearbyintf(float x)
{
__asm__ ("frinti %s0, %s1" : "=w"(x) : "w"(x));
return x;
}
#include <math.h>
double rint(double x)
{
__asm__ ("frintx %d0, %d1" : "=w"(x) : "w"(x));
return x;
}
#include <math.h>
float rintf(float x)
{
__asm__ ("frintx %s0, %s1" : "=w"(x) : "w"(x));
return x;
}
#include <math.h>
double round(double x)
{
__asm__ ("frinta %d0, %d1" : "=w"(x) : "w"(x));
return x;
}
#include <math.h>
float roundf(float x)
{
__asm__ ("frinta %s0, %s1" : "=w"(x) : "w"(x));
return x;
}
#include <math.h>
double sqrt(double x)
{
__asm__ ("fsqrt %d0, %d1" : "=w"(x) : "w"(x));
return x;
}
.text
.global sqrt
.type sqrt,%function
sqrt:
fsqrt d0, d0
ret
#include <math.h>
float sqrtf(float x)
{
__asm__ ("fsqrt %s0, %s1" : "=w"(x) : "w"(x));
return x;
}
.text
.global sqrtf
.type sqrtf,%function
sqrtf:
fsqrt s0, s0
ret
#include <math.h>
double trunc(double x)
{
__asm__ ("frintz %d0, %d1" : "=w"(x) : "w"(x));
return x;
}
#include <math.h>
float truncf(float x)
{
__asm__ ("frintz %s0, %s1" : "=w"(x) : "w"(x));
return x;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册