csum_partial.S 4.6 KB
Newer Older
A
Atsushi Nemoto 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
 * Quick'n'dirty IP checksum ...
 *
 * Copyright (C) 1998, 1999 Ralf Baechle
 * Copyright (C) 1999 Silicon Graphics, Inc.
 */
#include <asm/asm.h>
#include <asm/regdef.h>

#ifdef CONFIG_64BIT
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 * As we are sharing code base with the mips32 tree (which use the o32 ABI
 * register definitions). We need to redefine the register definitions from
 * the n64 ABI register naming to the o32 ABI register naming.
 */
#undef t0
#undef t1
#undef t2
#undef t3
#define t0	$8
#define t1	$9
#define t2	$10
#define t3	$11
#define t4	$12
#define t5	$13
#define t6	$14
#define t7	$15
A
Atsushi Nemoto 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
#endif

#define ADDC(sum,reg)						\
	addu	sum, reg;					\
	sltu	v1, sum, reg;					\
	addu	sum, v1

#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)	\
	lw	_t0, (offset + 0x00)(src);			\
	lw	_t1, (offset + 0x04)(src);			\
	lw	_t2, (offset + 0x08)(src); 			\
	lw	_t3, (offset + 0x0c)(src); 			\
	ADDC(sum, _t0);						\
	ADDC(sum, _t1);						\
	ADDC(sum, _t2);						\
	ADDC(sum, _t3);						\
	lw	_t0, (offset + 0x10)(src);			\
	lw	_t1, (offset + 0x14)(src);			\
	lw	_t2, (offset + 0x18)(src);			\
	lw	_t3, (offset + 0x1c)(src);			\
	ADDC(sum, _t0);						\
	ADDC(sum, _t1);						\
	ADDC(sum, _t2);						\
	ADDC(sum, _t3);						\

/*
 * a0: source address
 * a1: length of the area to checksum
 * a2: partial checksum
 */

#define src a0
#define sum v0

	.text
	.set	noreorder

/* unknown src alignment and < 8 bytes to go  */
small_csumcpy:
71
	move	a1, t2
A
Atsushi Nemoto 已提交
72

73 74 75
	andi	t0, a1, 4
	beqz	t0, 1f
	 andi	t0, a1, 2
A
Atsushi Nemoto 已提交
76 77

	/* Still a full word to go  */
78
	ulw	t1, (src)
A
Atsushi Nemoto 已提交
79
	PTR_ADDIU	src, 4
80
	ADDC(sum, t1)
A
Atsushi Nemoto 已提交
81

82 83 84
1:	move	t1, zero
	beqz	t0, 1f
	 andi	t0, a1, 1
A
Atsushi Nemoto 已提交
85 86

	/* Still a halfword to go  */
87
	ulhu	t1, (src)
A
Atsushi Nemoto 已提交
88 89
	PTR_ADDIU	src, 2

90 91
1:	beqz	t0, 1f
	 sll	t1, t1, 16
A
Atsushi Nemoto 已提交
92

93
	lbu	t2, (src)
A
Atsushi Nemoto 已提交
94 95 96
	 nop

#ifdef __MIPSEB__
97
	sll	t2, t2, 8
A
Atsushi Nemoto 已提交
98
#endif
99
	or	t1, t2
A
Atsushi Nemoto 已提交
100

101
1:	ADDC(sum, t1)
A
Atsushi Nemoto 已提交
102 103 104 105 106 107 108 109 110

	/* fold checksum */
	sll	v1, sum, 16
	addu	sum, v1
	sltu	v1, sum, v1
	srl	sum, sum, 16
	addu	sum, v1

	/* odd buffer alignment? */
111
	beqz	t7, 1f
A
Atsushi Nemoto 已提交
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
	 nop
	sll	v1, sum, 8
	srl	sum, sum, 8
	or	sum, v1
	andi	sum, 0xffff
1:
	.set	reorder
	/* Add the passed partial csum.  */
	ADDC(sum, a2)
	jr	ra
	.set	noreorder

/* ------------------------------------------------------------------------- */

	.align	5
LEAF(csum_partial)
	move	sum, zero
129
	move	t7, zero
A
Atsushi Nemoto 已提交
130 131 132

	sltiu	t8, a1, 0x8
	bnez	t8, small_csumcpy		/* < 8 bytes to copy */
133
	 move	t2, a1
A
Atsushi Nemoto 已提交
134 135

	beqz	a1, out
136
	 andi	t7, src, 0x1			/* odd buffer? */
A
Atsushi Nemoto 已提交
137 138

hword_align:
139
	beqz	t7, word_align
A
Atsushi Nemoto 已提交
140 141
	 andi	t8, src, 0x2

142
	lbu	t0, (src)
A
Atsushi Nemoto 已提交
143 144
	LONG_SUBU	a1, a1, 0x1
#ifdef __MIPSEL__
145
	sll	t0, t0, 8
A
Atsushi Nemoto 已提交
146
#endif
147
	ADDC(sum, t0)
A
Atsushi Nemoto 已提交
148 149 150 151 152 153 154
	PTR_ADDU	src, src, 0x1
	andi	t8, src, 0x2

word_align:
	beqz	t8, dword_align
	 sltiu	t8, a1, 56

155
	lhu	t0, (src)
A
Atsushi Nemoto 已提交
156
	LONG_SUBU	a1, a1, 0x2
157
	ADDC(sum, t0)
A
Atsushi Nemoto 已提交
158 159 160 161 162 163 164 165 166 167 168
	sltiu	t8, a1, 56
	PTR_ADDU	src, src, 0x2

dword_align:
	bnez	t8, do_end_words
	 move	t8, a1

	andi	t8, src, 0x4
	beqz	t8, qword_align
	 andi	t8, src, 0x8

169
	lw	t0, 0x00(src)
A
Atsushi Nemoto 已提交
170
	LONG_SUBU	a1, a1, 0x4
171
	ADDC(sum, t0)
A
Atsushi Nemoto 已提交
172 173 174 175 176 177 178
	PTR_ADDU	src, src, 0x4
	andi	t8, src, 0x8

qword_align:
	beqz	t8, oword_align
	 andi	t8, src, 0x10

179 180
	lw	t0, 0x00(src)
	lw	t1, 0x04(src)
A
Atsushi Nemoto 已提交
181
	LONG_SUBU	a1, a1, 0x8
182 183
	ADDC(sum, t0)
	ADDC(sum, t1)
A
Atsushi Nemoto 已提交
184 185 186 187 188 189 190
	PTR_ADDU	src, src, 0x8
	andi	t8, src, 0x10

oword_align:
	beqz	t8, begin_movement
	 LONG_SRL	t8, a1, 0x7

191 192 193 194 195 196 197 198
	lw	t3, 0x08(src)
	lw	t4, 0x0c(src)
	lw	t0, 0x00(src)
	lw	t1, 0x04(src)
	ADDC(sum, t3)
	ADDC(sum, t4)
	ADDC(sum, t0)
	ADDC(sum, t1)
A
Atsushi Nemoto 已提交
199 200 201 202 203 204
	LONG_SUBU	a1, a1, 0x10
	PTR_ADDU	src, src, 0x10
	LONG_SRL	t8, a1, 0x7

begin_movement:
	beqz	t8, 1f
205
	 andi	t2, a1, 0x40
A
Atsushi Nemoto 已提交
206 207

move_128bytes:
208 209 210 211
	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
	CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
	CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
	CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
A
Atsushi Nemoto 已提交
212 213 214 215 216
	LONG_SUBU	t8, t8, 0x01
	bnez	t8, move_128bytes
	 PTR_ADDU	src, src, 0x80

1:
217 218
	beqz	t2, 1f
	 andi	t2, a1, 0x20
A
Atsushi Nemoto 已提交
219 220

move_64bytes:
221 222
	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
	CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
A
Atsushi Nemoto 已提交
223 224 225
	PTR_ADDU	src, src, 0x40

1:
226
	beqz	t2, do_end_words
A
Atsushi Nemoto 已提交
227 228 229
	 andi	t8, a1, 0x1c

move_32bytes:
230
	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
A
Atsushi Nemoto 已提交
231 232 233 234 235 236 237 238
	andi	t8, a1, 0x1c
	PTR_ADDU	src, src, 0x20

do_end_words:
	beqz	t8, maybe_end_cruft
	 LONG_SRL	t8, t8, 0x2

end_words:
239
	lw	t0, (src)
A
Atsushi Nemoto 已提交
240
	LONG_SUBU	t8, t8, 0x1
241
	ADDC(sum, t0)
A
Atsushi Nemoto 已提交
242 243 244 245
	bnez	t8, end_words
	 PTR_ADDU	src, src, 0x4

maybe_end_cruft:
246
	andi	t2, a1, 0x3
A
Atsushi Nemoto 已提交
247 248

small_memcpy:
249
 j small_csumcpy; move a1, t2		/* XXX ??? */
A
Atsushi Nemoto 已提交
250
	beqz	t2, out
251
	 move	a1, t2
A
Atsushi Nemoto 已提交
252 253

end_bytes:
254
	lb	t0, (src)
A
Atsushi Nemoto 已提交
255 256 257 258 259 260 261 262
	LONG_SUBU	a1, a1, 0x1
	bnez	a2, end_bytes
	 PTR_ADDU	src, src, 0x1

out:
	jr	ra
	 move	v0, sum
	END(csum_partial)