udivdi3.S 8.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
/*
 * udivdi3.S - unsigned long long division
 *
 * Copyright 2003-2007 Analog Devices Inc.
 * Enter bugs at http://blackfin.uclinux.org/
 *
 * Licensed under the GPLv2 or later.
 */

#include <linux/linkage.h>

#define CARRY AC0

#ifdef CONFIG_ARITHMETIC_OPS_L1
.section .l1.text
#else
.text
#endif


ENTRY(___udivdi3)
   R3 = [SP + 12];
   [--SP] = (R7:4, P5:3);

   /* Attempt to use divide primitive first; these will handle
   **  most cases, and they're quick - avoids stalls incurred by
   ** testing for identities.
   */

   R4 = R2 | R3;
   CC = R4 == 0;
   IF CC JUMP .LDIV_BY_ZERO;

   R4.H = 0x8000;
   R4 >>>= 16;                  // R4 now 0xFFFF8000
   R5 = R0 | R2;                // If either dividend or
   R4 = R5 & R4;                // divisor have bits in
   CC = R4;                     // top half or low half's sign
   IF CC JUMP .LIDENTS;          // bit, skip builtins.
   R4 = R1 | R3;                // Also check top halves
   CC = R4;
   IF CC JUMP .LIDENTS;

   /* Can use the builtins. */

   AQ = CC;                     // Clear AQ (CC==0)
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   DIVQ(R0, R2);
   R0 = R0.L (Z);
   R1 = 0;
   (R7:4, P5:3) = [SP++];
   RTS;

.LIDENTS:
   /* Test for common identities. Value to be returned is
   ** placed in R6,R7.
   */
                                // Check for 0/y, return 0
   R4 = R0 | R1;
   CC = R4 == 0;
   IF CC JUMP .LRETURN_R0;

                                // Check for x/x, return 1
   R6 = R0 - R2;                // If x == y, then both R6 and R7 will be zero
   R7 = R1 - R3;
   R4 = R6 | R7;                // making R4 zero.
   R6 += 1;                     // which would now make R6:R7==1.
   CC = R4 == 0;
   IF CC JUMP .LRETURN_IDENT;

                                // Check for x/1, return x
   R6 = R0;
   R7 = R1;
   CC = R3 == 0;
   IF !CC JUMP .Lnexttest;
   CC = R2 == 1;
   IF CC JUMP .LRETURN_IDENT;

.Lnexttest:
   R4.L = ONES R2;              // check for div by power of two which
   R5.L = ONES R3;              // can be done using a shift
   R6 = PACK (R5.L, R4.L);
   CC = R6 == 1;
   IF CC JUMP .Lpower_of_two_upper_zero;
   R6 = PACK (R4.L, R5.L);
   CC = R6 == 1;
   IF CC JUMP .Lpower_of_two_lower_zero;

                                // Check for x < y, return 0
   R6 = 0;
   R7 = R6;
   CC = R1 < R3 (IU);
   IF CC JUMP .LRETURN_IDENT;
   CC = R1 == R3;
   IF !CC JUMP .Lno_idents;
   CC = R0 < R2 (IU);
   IF CC JUMP .LRETURN_IDENT;

.Lno_idents:                    // Idents don't match. Go for the full operation


   // If X, or X and Y have high bit set, it'll affect the
   // results, so shift right one to stop this. Note: we've already
   // checked that X >= Y, so Y's msb won't be set unless X's
   // is.

   R4 = 0;
   CC = R1 < 0;
   IF !CC JUMP .Lx_msb_clear;
   CC = !CC;                   // 1 -> 0;
   R1 = ROT R1 BY -1;          // Shift X >> 1
   R0 = ROT R0 BY -1;          // lsb -> CC
   BITSET(R4,31);              // to record only x msb was set
   CC = R3 < 0;
   IF !CC JUMP .Ly_msb_clear;
   CC = !CC;
   R3 = ROT R3 BY -1;          // Shift Y >> 1
   R2 = ROT R2 BY -1;
   BITCLR(R4,31);              // clear bit to record only x msb was set

.Ly_msb_clear:
.Lx_msb_clear:
   // Bit 31 in R4 indicates X msb set, but Y msb wasn't, and no bits
   // were lost, so we should shift result left by one.

   [--SP] = R4;                // save for later

   // In the loop that follows, each iteration we add
   // either Y' or -Y' to the Remainder. We compute the
   // negated Y', and store, for convenience. Y' goes
   // into P0:P1, while -Y' goes into P2:P3.

   P0 = R2;
   P1 = R3;
   R2 = -R2;
   CC = CARRY;
   CC = !CC;
   R4 = CC;
   R3 = -R3;
   R3 = R3 - R4;

   R6 = 0;                     // remainder = 0
   R7 = R6;

   [--SP] = R2; P2 = SP;
   [--SP] = R3; P3 = SP;
   [--SP] = R6; P5 = SP;       // AQ = 0
   [--SP] = P1;

   /* In the loop that follows, we use the following
   ** register assignments:
   ** R0,R1 X, workspace
   ** R2,R3 Y, workspace
   ** R4,R5 partial Div
   ** R6,R7 partial remainder
   ** P5 AQ
   ** The remainder and div form a 128-bit number, with
   ** the remainder in the high 64-bits.
   */
   R4 = R0;                    // Div = X'
   R5 = R1;
   R3 = 0;

   P4 = 64;                    // Iterate once per bit
   LSETUP(.LULST,.LULEND) LC0 = P4;
.LULST:
        /* Shift Div and remainder up by one. The bit shifted
        ** out of the top of the quotient is shifted into the bottom
        ** of the remainder.
        */
        CC = R3;
        R4 = ROT R4 BY 1;
        R5 = ROT R5 BY 1 ||        // low q to high q
             R2 = [P5];            // load saved AQ
        R6 = ROT R6 BY 1 ||        // high q to low r
             R0 = [P2];            // load -Y'
        R7 = ROT R7 BY 1 ||        // low r to high r
             R1 = [P3];

                                   // Assume add -Y'
        CC = R2 < 0;               // But if AQ is set...
        IF CC R0 = P0;             // then add Y' instead
        IF CC R1 = P1;

        R6 = R6 + R0;              // Rem += (Y' or -Y')
        CC = CARRY;
        R0 = CC;
        R7 = R7 + R1;
        R7 = R7 + R0 (NS) ||
             R1 = [SP];
                                   // Set the next AQ bit
        R1 = R7 ^ R1;              // from Remainder and Y'
        R1 = R1 >> 31 ||           // Negate AQ's value, and
             [P5] = R1;            // save next AQ
        BITTGL(R1, 0);             // add neg AQ  to the Div
.LULEND: R4 = R4 + R1;

   R6 = [SP + 16];

   R0 = R4;
   R1 = R5;
   CC = BITTST(R6,30);         // Just set CC=0
   R4 = ROT R0 BY 1;           // but if we had to shift X,
   R5 = ROT R1 BY 1;           // and didn't shift any bits out,
   CC = BITTST(R6,31);         // then the result will be half as
   IF CC R0 = R4;              // much as required, so shift left
   IF CC R1 = R5;              // one space.

   SP += 20;
   (R7:4, P5:3) = [SP++];
   RTS;

.Lpower_of_two:
   /* Y has a single bit set, which means it's a power of two.
   ** That means we can perform the division just by shifting
   ** X to the right the appropriate number of bits
   */

   /* signbits returns the number of sign bits, minus one.
   ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
   ** to shift right n-signbits spaces. It also means 0x80000000
   ** is a special case, because that *also* gives a signbits of 0
   */
.Lpower_of_two_lower_zero:
   R7 = 0;
   R6 = R1 >> 31;
   CC = R3 < 0;
   IF CC JUMP .LRETURN_IDENT;

   R2.L = SIGNBITS R3;
   R2 = R2.L (Z);
   R2 += -62;
   (R7:4, P5:3) = [SP++];
   JUMP ___lshftli;

.Lpower_of_two_upper_zero:
   CC = R2 < 0;
   IF CC JUMP .Lmaxint_shift;

   R2.L = SIGNBITS R2;
   R2 = R2.L (Z);
   R2 += -30;
   (R7:4, P5:3) = [SP++];
   JUMP ___lshftli;

.Lmaxint_shift:
   R2 = -31;
   (R7:4, P5:3) = [SP++];
   JUMP ___lshftli;

.LRETURN_IDENT:
   R0 = R6;
   R1 = R7;
.LRETURN_R0:
   (R7:4, P5:3) = [SP++];
   RTS;
.LDIV_BY_ZERO:
   R0 = ~R2;
   R1 = R0;
   (R7:4, P5:3) = [SP++];
   RTS;

ENDPROC(___udivdi3)


ENTRY(___lshftli)
	CC = R2 == 0;
	IF CC JUMP .Lfinished;	// nothing to do
	CC = R2 < 0;
	IF CC JUMP .Lrshift;
	R3 = 64;
	CC = R2 < R3;
	IF !CC JUMP .Lretzero;

	// We're shifting left, and it's less than 64 bits, so
	// a valid result will be returned.

	R3 >>= 1;	// R3 now 32
	CC = R2 < R3;

	IF !CC JUMP .Lzerohalf;

	// We're shifting left, between 1 and 31 bits, which means
	// some of the low half will be shifted into the high half.
	// Work out how much.

	R3 = R3 - R2;

	// Save that much data from the bottom half.

	P1 = R7;
	R7 = R0;
	R7 >>= R3;

	// Adjust both parts of the parameter.

	R0 <<= R2;
	R1 <<= R2;

	// And include the bits moved across.

	R1 = R1 | R7;
	R7 = P1;
	RTS;

.Lzerohalf:
	// We're shifting left, between 32 and 63 bits, so the
	// bottom half will become zero, and the top half will
	// lose some bits. How many?

	R2 = R2 - R3;	// N - 32
	R1 = LSHIFT R0 BY R2.L;
	R0 = R0 - R0;
	RTS;

.Lretzero:
	R0 = R0 - R0;
	R1 = R0;
.Lfinished:
	RTS;

.Lrshift:
	// We're shifting right, but by how much?
	R2 = -R2;
	R3 = 64;
	CC = R2 < R3;
	IF !CC JUMP .Lretzero;

	// Shifting right less than 64 bits, so some result bits will
	// be retained.

	R3 >>= 1;	// R3 now 32
	CC = R2 < R3;
	IF !CC JUMP .Lsignhalf;

	// Shifting right between 1 and 31 bits, so need to copy
	// data across words.

	P1 = R7;
	R3 = R3 - R2;
	R7 = R1;
	R7 <<= R3;
	R1 >>= R2;
	R0 >>= R2;
	R0 = R7 | R0;
	R7 = P1;
	RTS;

.Lsignhalf:
	// Shifting right between 32 and 63 bits, so the top half
	// will become all zero-bits, and the bottom half is some
	// of the top half. But how much?

	R2 = R2 - R3;
	R0 = R1;
	R0 >>= R2;
	R1 = 0;
	RTS;

ENDPROC(___lshftli)