powerpc64: add single-instruction math functions

while the official elfv2 abi for "powerpc64le" sets power8 as the baseline isa, we use it for both little and big endian powerpc64 targets and need to maintain compatibility with pre-power8 models. the instructions for sqrt, fabs, and fma are in the baseline isa; support for the rest is conditional via predefined isa-level macros. patch by David Edelsohn.

powerpc64: add single-instruction math functions
while the official elfv2 abi for "powerpc64le" sets power8 as the baseline isa, we use it for both little and big endian powerpc64 targets and need to maintain compatibility with pre-power8 models. the instructions for sqrt, fabs, and fma are in the baseline isa; support for the rest is conditional via predefined isa-level macros. patch by David Edelsohn.
94f74419 · Rich Felker · 9d12a6a2 · 94f74419 · 94f74419 · 94f74419
22 changed file
--- a/src/math/powerpc64/ceil.c
+++ b/src/math/powerpc64/ceil.c
+#include <math.h>
+
+#ifdef _ARCH_PWR5X
+
+double ceil(double x)
+{
+	__asm__ ("frip %0, %1" : "=d"(x) : "d"(x));
+	return x;
+}
+
+#else
+
+#include "../ceil.c"
+
+#endif
--- a/src/math/powerpc64/ceilf.c
+++ b/src/math/powerpc64/ceilf.c
+#include <math.h>
+
+#ifdef _ARCH_PWR5X
+
+float ceilf(float x)
+{
+	__asm__ ("frip %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
+
+#else
+
+#include "../ceilf.c"
+
+#endif
--- a/src/math/powerpc64/fabs.c
+++ b/src/math/powerpc64/fabs.c
+#include <math.h>
+
+double fabs(double x)
+{
+	__asm__ ("fabs %0, %1" : "=d"(x) : "d"(x));
+	return x;
+}
--- a/src/math/powerpc64/fabsf.c
+++ b/src/math/powerpc64/fabsf.c
+#include <math.h>
+
+float fabsf(float x)
+{
+	__asm__ ("fabs %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
--- a/src/math/powerpc64/floor.c
+++ b/src/math/powerpc64/floor.c
+#include <math.h>
+
+#ifdef _ARCH_PWR5X
+
+double floor(double x)
+{
+	__asm__ ("frim %0, %1" : "=d"(x) : "d"(x));
+	return x;
+}
+
+#else
+
+#include "../floor.c"
+
+#endif
--- a/src/math/powerpc64/floorf.c
+++ b/src/math/powerpc64/floorf.c
+#include <math.h>
+
+#ifdef _ARCH_PWR5X
+
+float floorf(float x)
+{
+	__asm__ ("frim %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
+
+#else
+
+#include "../floorf.c"
+
+#endif
--- a/src/math/powerpc64/fma.c
+++ b/src/math/powerpc64/fma.c
+#include <math.h>
+
+double fma(double x, double y, double z)
+{
+	__asm__ ("fmadd %0, %1, %2, %3" : "=d"(x) : "d"(x), "d"(y), "d"(z));
+	return x;
+}
--- a/src/math/powerpc64/fmaf.c
+++ b/src/math/powerpc64/fmaf.c
+#include <math.h>
+
+float fmaf(float x, float y, float z)
+{
+	__asm__ ("fmadds %0, %1, %2, %3" : "=f"(x) : "f"(x), "f"(y), "f"(z));
+	return x;
+}
--- a/src/math/powerpc64/fmax.c
+++ b/src/math/powerpc64/fmax.c
+#include <math.h>
+
+#ifdef __VSX__
+
+double fmax(double x, double y)
+{
+	__asm__ ("xsmaxdp %x0, %x1, %x2" : "=ws"(x) : "ws"(x), "ws"(y));
+	return x;
+}
+
+#else
+
+#include "../fmax.c"
+
+#endif
--- a/src/math/powerpc64/fmaxf.c
+++ b/src/math/powerpc64/fmaxf.c
+#include <math.h>
+
+#ifdef __VSX__
+
+float fmaxf(float x, float y)
+{
+	__asm__ ("xsmaxdp %x0, %x1, %x2" : "=ww"(x) : "ww"(x), "ww"(y));
+	return x;
+}
+
+#else
+
+#include "../fmaxf.c"
+
+#endif
--- a/src/math/powerpc64/fmin.c
+++ b/src/math/powerpc64/fmin.c
+#include <math.h>
+
+#ifdef __VSX__
+
+double fmin(double x, double y)
+{
+	__asm__ ("xsmindp %x0, %x1, %x2" : "=ws"(x) : "ws"(x), "ws"(y));
+	return x;
+}
+
+#else
+
+#include "../fmin.c"
+
+#endif
--- a/src/math/powerpc64/fminf.c
+++ b/src/math/powerpc64/fminf.c
+#include <math.h>
+
+#ifdef __VSX__
+
+float fminf(float x, float y)
+{
+	__asm__ ("xsmindp %x0, %x1, %x2" : "=ww"(x) : "ww"(x), "ww"(y));
+	return x;
+}
+
+#else
+
+#include "../fminf.c"
+
+#endif
--- a/src/math/powerpc64/lrint.c
+++ b/src/math/powerpc64/lrint.c
+#include <math.h>
+
+#ifdef _ARCH_PWR5X
+
+long lrint(double x)
+{
+	long n;
+	__asm__ ("fctid %0, %1" : "=d"(n) : "d"(x));
+	return n;
+}
+
+#else
+
+#include "../lrint.c"
+
+#endif
--- a/src/math/powerpc64/lrintf.c
+++ b/src/math/powerpc64/lrintf.c
+#include <math.h>
+
+#ifdef _ARCH_PWR5X
+
+long lrintf(float x)
+{
+	long n;
+	__asm__ ("fctid %0, %1" : "=d"(n) : "f"(x));
+	return n;
+}
+
+#else
+
+#include "../lrintf.c"
+
+#endif
--- a/src/math/powerpc64/lround.c
+++ b/src/math/powerpc64/lround.c
+#include <math.h>
+
+#ifdef __VSX__
+
+long lround(double x)
+{
+	long n;
+	__asm__ (
+		"xsrdpi %1, %1\n"
+		"fctid %0, %1\n" : "=d"(n), "+d"(x));
+	return n;
+}
+
+#else
+
+#include "../lround.c"
+
+#endif
--- a/src/math/powerpc64/lroundf.c
+++ b/src/math/powerpc64/lroundf.c
+#include <math.h>
+
+#ifdef __VSX__
+
+long lroundf(float x)
+{
+	long n;
+	__asm__ (
+		"xsrdpi %1, %1\n"
+		"fctid %0, %1\n" : "=d"(n), "+f"(x));
+	return n;
+}
+
+#else
+
+#include "../lroundf.c"
+
+#endif
--- a/src/math/powerpc64/round.c
+++ b/src/math/powerpc64/round.c
+#include <math.h>
+
+#ifdef _ARCH_PWR5X
+
+double round(double x)
+{
+	__asm__ ("frin %0, %1" : "=d"(x) : "d"(x));
+	return x;
+}
+
+#else
+
+#include "../round.c"
+
+#endif
--- a/src/math/powerpc64/roundf.c
+++ b/src/math/powerpc64/roundf.c
+#include <math.h>
+
+#ifdef _ARCH_PWR5X
+
+float roundf(float x)
+{
+	__asm__ ("frin %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
+
+#else
+
+#include "../roundf.c"
+
+#endif
--- a/src/math/powerpc64/sqrt.c
+++ b/src/math/powerpc64/sqrt.c
+#include <math.h>
+
+double sqrt(double x)
+{
+	__asm__ ("fsqrt %0, %1" : "=d"(x) : "d"(x));
+	return x;
+}
--- a/src/math/powerpc64/sqrtf.c
+++ b/src/math/powerpc64/sqrtf.c
+#include <math.h>
+
+float sqrtf(float x)
+{
+	__asm__ ("fsqrts %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
--- a/src/math/powerpc64/trunc.c
+++ b/src/math/powerpc64/trunc.c
+#include <math.h>
+
+#ifdef _ARCH_PWR5X
+
+double trunc(double x)
+{
+	__asm__ ("friz %0, %1" : "=d"(x) : "d"(x));
+	return x;
+}
+
+#else
+
+#include "../trunc.c"
+
+#endif
--- a/src/math/powerpc64/truncf.c
+++ b/src/math/powerpc64/truncf.c
+#include <math.h>
+
+#ifdef _ARCH_PWR5X
+
+float truncf(float x)
+{
+	__asm__ ("friz %0, %1" : "=f"(x) : "f"(x));
+	return x;
+}
+
+#else
+
+#include "../truncf.c"
+
+#endif