word-at-a-time.h 4.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
#ifndef _ASM_WORD_AT_A_TIME_H
#define _ASM_WORD_AT_A_TIME_H

/*
 * Word-at-a-time interfaces for PowerPC.
 */

#include <linux/kernel.h>
#include <asm/asm-compat.h>

11 12
#ifdef __BIG_ENDIAN__

13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
struct word_at_a_time {
	const unsigned long high_bits, low_bits;
};

#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) }

/* Bit set in the bytes that have a zero */
static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c)
{
	unsigned long mask = (val & c->low_bits) + c->low_bits;
	return ~(mask | rhs);
}

#define create_zero_mask(mask) (mask)

static inline long find_zero(unsigned long mask)
{
	long leading_zero_bits;

	asm (PPC_CNTLZL "%0,%1" : "=r" (leading_zero_bits) : "r" (mask));
	return leading_zero_bits >> 3;
}

static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
{
	unsigned long rhs = val | c->low_bits;
	*data = rhs;
	return (val + c->high_bits) & ~rhs;
}

43 44 45 46 47
static inline unsigned long zero_bytemask(unsigned long mask)
{
	return ~1ul << __fls(mask);
}

48 49
#else

50 51 52
#ifdef CONFIG_64BIT

/* unused */
53 54 55
struct word_at_a_time {
};

56
#define WORD_AT_A_TIME_CONSTANTS { }
57

58 59 60 61 62
/* This will give us 0xff for a NULL char and 0x00 elsewhere */
static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
{
	unsigned long ret;
	unsigned long zero = 0;
63

64 65
	asm("cmpb %0,%1,%2" : "=r" (ret) : "r" (a), "r" (zero));
	*bits = ret;
66

67 68 69 70 71 72 73 74 75 76
	return ret;
}

static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
{
	return bits;
}

/* Alan Modra's little-endian strlen tail for 64-bit */
static inline unsigned long create_zero_mask(unsigned long bits)
77
{
78 79 80
	unsigned long leading_zero_bits;
	long trailing_zero_bit_mask;

81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
	asm("addi	%1,%2,-1\n\t"
	    "andc	%1,%1,%2\n\t"
	    "popcntd	%0,%1"
		: "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
		: "r" (bits));

	return leading_zero_bits;
}

static inline unsigned long find_zero(unsigned long mask)
{
	return mask >> 3;
}

/* This assumes that we never ask for an all 1s bitmask */
static inline unsigned long zero_bytemask(unsigned long mask)
{
	return (1UL << mask) - 1;
99 100 101 102
}

#else	/* 32-bit case */

103 104 105 106 107 108
struct word_at_a_time {
	const unsigned long one_bits, high_bits;
};

#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }

109 110 111 112 113 114 115 116
/*
 * This is largely generic for little-endian machines, but the
 * optimal byte mask counting is probably going to be something
 * that is architecture-specific. If you have a reliably fast
 * bit count instruction, that might be better than the multiply
 * and shift, for example.
 */

117 118 119 120 121 122 123 124 125
/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
static inline long count_masked_bytes(long mask)
{
	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
	long a = (0x0ff0001+mask) >> 23;
	/* Fix the 1 for 00 case */
	return a & mask;
}

126 127 128 129 130 131 132 133 134 135 136
static inline unsigned long create_zero_mask(unsigned long bits)
{
	bits = (bits - 1) & ~bits;
	return bits >> 7;
}

static inline unsigned long find_zero(unsigned long mask)
{
	return count_masked_bytes(mask);
}

137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
/* Return nonzero if it has a zero */
static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
{
	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
	*bits = mask;
	return mask;
}

static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
{
	return bits;
}

/* The mask we created is directly usable as a bytemask */
#define zero_bytemask(mask) (mask)

153 154 155
#endif /* CONFIG_64BIT */

#endif /* __BIG_ENDIAN__ */
156

157 158 159 160 161 162 163 164 165
/*
 * We use load_unaligned_zero() in a selftest, which builds a userspace
 * program. Some linker scripts seem to discard the .fixup section, so allow
 * the test code to use a different section name.
 */
#ifndef FIXUP_SECTION
#define FIXUP_SECTION ".fixup"
#endif

166 167 168 169 170 171 172
static inline unsigned long load_unaligned_zeropad(const void *addr)
{
	unsigned long ret, offset, tmp;

	asm(
	"1:	" PPC_LL "%[ret], 0(%[addr])\n"
	"2:\n"
173
	".section " FIXUP_SECTION ",\"ax\"\n"
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
	"3:	"
#ifdef __powerpc64__
	"clrrdi		%[tmp], %[addr], 3\n\t"
	"clrlsldi	%[offset], %[addr], 61, 3\n\t"
	"ld		%[ret], 0(%[tmp])\n\t"
#ifdef __BIG_ENDIAN__
	"sld		%[ret], %[ret], %[offset]\n\t"
#else
	"srd		%[ret], %[ret], %[offset]\n\t"
#endif
#else
	"clrrwi		%[tmp], %[addr], 2\n\t"
	"clrlslwi	%[offset], %[addr], 30, 3\n\t"
	"lwz		%[ret], 0(%[tmp])\n\t"
#ifdef __BIG_ENDIAN__
	"slw		%[ret], %[ret], %[offset]\n\t"
#else
	"srw		%[ret], %[ret], %[offset]\n\t"
#endif
#endif
	"b	2b\n"
	".previous\n"
	".section __ex_table,\"a\"\n\t"
		PPC_LONG_ALIGN "\n\t"
		PPC_LONG "1b,3b\n"
	".previous"
	: [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret)
	: [addr] "b" (addr), "m" (*(unsigned long *)addr));

	return ret;
}

206 207
#undef FIXUP_SECTION

208
#endif /* _ASM_WORD_AT_A_TIME_H */