lib1funcs.S 8.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/*
 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
 *
4
 * Author: Nicolas Pitre <nico@fluxnic.net>
L
Linus Torvalds 已提交
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
 *   - contributed to gcc-3.4 on Sep 30, 2003
 *   - adapted for the Linux kernel on Oct 2, 2003
 */

/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.

This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.

In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file into combinations with other programs,
and to distribute those combinations without any restriction coming
from the use of this file.  (The General Public License restrictions
do apply in other respects; for example, they cover modification of
the file, and distribution when not linked into a combine
executable.)

This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; see the file COPYING.  If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */


#include <linux/linkage.h>
#include <asm/assembler.h>
38
#include <asm/unwind.h>
L
Linus Torvalds 已提交
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98

.macro ARM_DIV_BODY dividend, divisor, result, curbit

#if __LINUX_ARM_ARCH__ >= 5

	clz	\curbit, \divisor
	clz	\result, \dividend
	sub	\result, \curbit, \result
	mov	\curbit, #1
	mov	\divisor, \divisor, lsl \result
	mov	\curbit, \curbit, lsl \result
	mov	\result, #0
	
#else

	@ Initially shift the divisor left 3 bits if possible,
	@ set curbit accordingly.  This allows for curbit to be located
	@ at the left end of each 4 bit nibbles in the division loop
	@ to save one loop in most cases.
	tst	\divisor, #0xe0000000
	moveq	\divisor, \divisor, lsl #3
	moveq	\curbit, #8
	movne	\curbit, #1

	@ Unless the divisor is very big, shift it up in multiples of
	@ four bits, since this is the amount of unwinding in the main
	@ division loop.  Continue shifting until the divisor is 
	@ larger than the dividend.
1:	cmp	\divisor, #0x10000000
	cmplo	\divisor, \dividend
	movlo	\divisor, \divisor, lsl #4
	movlo	\curbit, \curbit, lsl #4
	blo	1b

	@ For very big divisors, we must shift it a bit at a time, or
	@ we will be in danger of overflowing.
1:	cmp	\divisor, #0x80000000
	cmplo	\divisor, \dividend
	movlo	\divisor, \divisor, lsl #1
	movlo	\curbit, \curbit, lsl #1
	blo	1b

	mov	\result, #0

#endif

	@ Division loop
1:	cmp	\dividend, \divisor
	subhs	\dividend, \dividend, \divisor
	orrhs	\result,   \result,   \curbit
	cmp	\dividend, \divisor,  lsr #1
	subhs	\dividend, \dividend, \divisor, lsr #1
	orrhs	\result,   \result,   \curbit,  lsr #1
	cmp	\dividend, \divisor,  lsr #2
	subhs	\dividend, \dividend, \divisor, lsr #2
	orrhs	\result,   \result,   \curbit,  lsr #2
	cmp	\dividend, \divisor,  lsr #3
	subhs	\dividend, \dividend, \divisor, lsr #3
	orrhs	\result,   \result,   \curbit,  lsr #3
	cmp	\dividend, #0			@ Early termination?
99
	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
L
Linus Torvalds 已提交
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
	movne	\divisor,  \divisor, lsr #4
	bne	1b

.endm


.macro ARM_DIV2_ORDER divisor, order

#if __LINUX_ARM_ARCH__ >= 5

	clz	\order, \divisor
	rsb	\order, \order, #31

#else

	cmp	\divisor, #(1 << 16)
	movhs	\divisor, \divisor, lsr #16
	movhs	\order, #16
	movlo	\order, #0

	cmp	\divisor, #(1 << 8)
	movhs	\divisor, \divisor, lsr #8
	addhs	\order, \order, #8

	cmp	\divisor, #(1 << 4)
	movhs	\divisor, \divisor, lsr #4
	addhs	\order, \order, #4

	cmp	\divisor, #(1 << 2)
	addhi	\order, \order, #3
	addls	\order, \order, \divisor, lsr #1

#endif

.endm


.macro ARM_MOD_BODY dividend, divisor, order, spare

#if __LINUX_ARM_ARCH__ >= 5

	clz	\order, \divisor
	clz	\spare, \dividend
	sub	\order, \order, \spare
	mov	\divisor, \divisor, lsl \order

#else

	mov	\order, #0

	@ Unless the divisor is very big, shift it up in multiples of
	@ four bits, since this is the amount of unwinding in the main
	@ division loop.  Continue shifting until the divisor is 
	@ larger than the dividend.
1:	cmp	\divisor, #0x10000000
	cmplo	\divisor, \dividend
	movlo	\divisor, \divisor, lsl #4
	addlo	\order, \order, #4
	blo	1b

	@ For very big divisors, we must shift it a bit at a time, or
	@ we will be in danger of overflowing.
1:	cmp	\divisor, #0x80000000
	cmplo	\divisor, \dividend
	movlo	\divisor, \divisor, lsl #1
	addlo	\order, \order, #1
	blo	1b

#endif

170
	@ Perform all needed subtractions to keep only the reminder.
L
Linus Torvalds 已提交
171 172 173 174 175 176 177 178 179 180 181 182 183 184
	@ Do comparisons in batch of 4 first.
	subs	\order, \order, #3		@ yes, 3 is intended here
	blt	2f

1:	cmp	\dividend, \divisor
	subhs	\dividend, \dividend, \divisor
	cmp	\dividend, \divisor,  lsr #1
	subhs	\dividend, \dividend, \divisor, lsr #1
	cmp	\dividend, \divisor,  lsr #2
	subhs	\dividend, \dividend, \divisor, lsr #2
	cmp	\dividend, \divisor,  lsr #3
	subhs	\dividend, \dividend, \divisor, lsr #3
	cmp	\dividend, #1
	mov	\divisor, \divisor, lsr #4
185
	subsge	\order, \order, #4
L
Linus Torvalds 已提交
186 187 188 189 190 191
	bge	1b

	tst	\order, #3
	teqne	\dividend, #0
	beq	5f

192
	@ Either 1, 2 or 3 comparison/subtractions are left.
L
Linus Torvalds 已提交
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
2:	cmn	\order, #2
	blt	4f
	beq	3f
	cmp	\dividend, \divisor
	subhs	\dividend, \dividend, \divisor
	mov	\divisor,  \divisor,  lsr #1
3:	cmp	\dividend, \divisor
	subhs	\dividend, \dividend, \divisor
	mov	\divisor,  \divisor,  lsr #1
4:	cmp	\dividend, \divisor
	subhs	\dividend, \dividend, \divisor
5:
.endm


208 209 210 211
#ifdef CONFIG_ARM_PATCH_IDIV
	.align	3
#endif

L
Linus Torvalds 已提交
212
ENTRY(__udivsi3)
213
ENTRY(__aeabi_uidiv)
214
UNWIND(.fnstart)
L
Linus Torvalds 已提交
215 216

	subs	r2, r1, #1
217
	reteq	lr
L
Linus Torvalds 已提交
218 219 220 221 222 223 224 225 226
	bcc	Ldiv0
	cmp	r0, r1
	bls	11f
	tst	r1, r2
	beq	12f

	ARM_DIV_BODY r0, r1, r2, r3

	mov	r0, r2
227
	ret	lr
L
Linus Torvalds 已提交
228 229 230

11:	moveq	r0, #1
	movne	r0, #0
231
	ret	lr
L
Linus Torvalds 已提交
232 233 234 235

12:	ARM_DIV2_ORDER r1, r2

	mov	r0, r0, lsr r2
236
	ret	lr
L
Linus Torvalds 已提交
237

238
UNWIND(.fnend)
239 240
ENDPROC(__udivsi3)
ENDPROC(__aeabi_uidiv)
L
Linus Torvalds 已提交
241 242

ENTRY(__umodsi3)
243
UNWIND(.fnstart)
L
Linus Torvalds 已提交
244 245 246 247 248 249 250

	subs	r2, r1, #1			@ compare divisor with 1
	bcc	Ldiv0
	cmpne	r0, r1				@ compare dividend with divisor
	moveq   r0, #0
	tsthi	r1, r2				@ see if divisor is power of 2
	andeq	r0, r0, r2
251
	retls	lr
L
Linus Torvalds 已提交
252 253 254

	ARM_MOD_BODY r0, r1, r2, r3

255
	ret	lr
L
Linus Torvalds 已提交
256

257
UNWIND(.fnend)
258
ENDPROC(__umodsi3)
L
Linus Torvalds 已提交
259

260 261 262 263
#ifdef CONFIG_ARM_PATCH_IDIV
	.align 3
#endif

L
Linus Torvalds 已提交
264
ENTRY(__divsi3)
265
ENTRY(__aeabi_idiv)
266
UNWIND(.fnstart)
L
Linus Torvalds 已提交
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284

	cmp	r1, #0
	eor	ip, r0, r1			@ save the sign of the result.
	beq	Ldiv0
	rsbmi	r1, r1, #0			@ loops below use unsigned.
	subs	r2, r1, #1			@ division by 1 or -1 ?
	beq	10f
	movs	r3, r0
	rsbmi	r3, r0, #0			@ positive dividend value
	cmp	r3, r1
	bls	11f
	tst	r1, r2				@ divisor is power of 2 ?
	beq	12f

	ARM_DIV_BODY r3, r1, r0, r2

	cmp	ip, #0
	rsbmi	r0, r0, #0
285
	ret	lr
L
Linus Torvalds 已提交
286 287 288

10:	teq	ip, r0				@ same sign ?
	rsbmi	r0, r0, #0
289
	ret	lr
L
Linus Torvalds 已提交
290 291 292 293

11:	movlo	r0, #0
	moveq	r0, ip, asr #31
	orreq	r0, r0, #1
294
	ret	lr
L
Linus Torvalds 已提交
295 296 297 298 299 300

12:	ARM_DIV2_ORDER r1, r2

	cmp	ip, #0
	mov	r0, r3, lsr r2
	rsbmi	r0, r0, #0
301
	ret	lr
L
Linus Torvalds 已提交
302

303
UNWIND(.fnend)
304 305
ENDPROC(__divsi3)
ENDPROC(__aeabi_idiv)
L
Linus Torvalds 已提交
306 307

ENTRY(__modsi3)
308
UNWIND(.fnstart)
L
Linus Torvalds 已提交
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325

	cmp	r1, #0
	beq	Ldiv0
	rsbmi	r1, r1, #0			@ loops below use unsigned.
	movs	ip, r0				@ preserve sign of dividend
	rsbmi	r0, r0, #0			@ if negative make positive
	subs	r2, r1, #1			@ compare divisor with 1
	cmpne	r0, r1				@ compare dividend with divisor
	moveq	r0, #0
	tsthi	r1, r2				@ see if divisor is power of 2
	andeq	r0, r0, r2
	bls	10f

	ARM_MOD_BODY r0, r1, r2, r3

10:	cmp	ip, #0
	rsbmi	r0, r0, #0
326
	ret	lr
L
Linus Torvalds 已提交
327

328
UNWIND(.fnend)
329 330
ENDPROC(__modsi3)

331 332 333
#ifdef CONFIG_AEABI

ENTRY(__aeabi_uidivmod)
334 335
UNWIND(.fnstart)
UNWIND(.save {r0, r1, ip, lr}	)
336 337 338 339 340 341

	stmfd	sp!, {r0, r1, ip, lr}
	bl	__aeabi_uidiv
	ldmfd	sp!, {r1, r2, ip, lr}
	mul	r3, r0, r2
	sub	r1, r1, r3
342
	ret	lr
343

344
UNWIND(.fnend)
345 346
ENDPROC(__aeabi_uidivmod)

347
ENTRY(__aeabi_idivmod)
348 349
UNWIND(.fnstart)
UNWIND(.save {r0, r1, ip, lr}	)
350 351 352 353 354
	stmfd	sp!, {r0, r1, ip, lr}
	bl	__aeabi_idiv
	ldmfd	sp!, {r1, r2, ip, lr}
	mul	r3, r0, r2
	sub	r1, r1, r3
355
	ret	lr
356

357
UNWIND(.fnend)
358 359
ENDPROC(__aeabi_idivmod)

360
#endif
L
Linus Torvalds 已提交
361 362

Ldiv0:
363 364 365
UNWIND(.fnstart)
UNWIND(.pad #4)
UNWIND(.save {lr})
366
	str	lr, [sp, #-8]!
L
Linus Torvalds 已提交
367 368
	bl	__div0
	mov	r0, #0			@ About as wrong as it could be.
369
	ldr	pc, [sp], #8
370 371
UNWIND(.fnend)
ENDPROC(Ldiv0)