Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 1 | /*************************************************************************** |
| 2 | * __________ __ ___. |
| 3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ |
| 4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / |
| 5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < |
| 6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ |
| 7 | * \/ \/ \/ \/ \/ |
| 8 | * $Id$ |
| 9 | * |
| 10 | * Copyright (C) 2008 by Jens Arnold |
| 11 | * Copyright (C) 2009 by Andrew Mahone |
| 12 | * |
| 13 | * Optimised replacements for libgcc functions |
| 14 | * |
| 15 | * Based on: libgcc routines for ARM cpu, additional algorithms from ARM System |
| 16 | * Developer's Guide |
| 17 | * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) |
| 18 | * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 |
| 19 | * Free Software Foundation, Inc. |
| 20 | * |
| 21 | * This program is free software; you can redistribute it and/or |
| 22 | * modify it under the terms of the GNU General Public License |
| 23 | * as published by the Free Software Foundation; either version 2 |
| 24 | * of the License, or (at your option) any later version. |
| 25 | * |
| 26 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY |
| 27 | * KIND, either express or implied. |
| 28 | * |
| 29 | ****************************************************************************/ |
| 30 | |
| 31 | #include <config.h> |
| 32 | |
| 33 | .macro ARM_SDIV32_PRE numerator, divisor, sign |
| 34 | /* sign[31] = divisor sign */ |
| 35 | ands \sign, \divisor, #1<<31 |
| 36 | rsbeq \divisor, \divisor, #0 |
| 37 | /* sign[31] = result sign, sign[0:30], C = numerator sign */ |
| 38 | eors \sign, \sign, \numerator, asr #32 |
| 39 | rsbcs \numerator, \numerator, #0 |
| 40 | .endm |
| 41 | |
| 42 | .macro ARM_SDIV32_POST quotient, remainder, sign |
| 43 | movs \sign, \sign, lsl #1 |
| 44 | .ifnc "", "\quotient" |
| 45 | rsbcs \quotient, \quotient, #0 |
| 46 | .endif |
| 47 | .ifnc "", "\remainder" |
| 48 | rsbmi \remainder, \remainder, #0 |
| 49 | .endif |
| 50 | .endm |
| 51 | |
| 52 | #if ARM_ARCH < 5 |
| 53 | .macro ARMV4_UDIV32_BODY numerator, divisor, quotient, remainder, tmp, bits, div0label, return |
| 54 | .ifnc "", "\div0label" |
| 55 | rsbs \divisor, \divisor, #0 |
| 56 | beq \div0label |
| 57 | .else |
| 58 | rsb \divisor, \divisor, #0 |
| 59 | .endif |
| 60 | /* This SWAR divider requires a numerator less than 1<<31, because it must |
| 61 | be able to shift the remainder left at each step without shifting out |
| 62 | topmost bit. Since a shift might be needed for the aligned remainder to |
| 63 | exceed the divisor, the topmost bit must be unset at the start to avoid |
| 64 | this overflow case. The original numerator is saved so that the result |
| 65 | can be corrected after the reduced division completes. */ |
| 66 | cmn \numerator, \divisor |
| 67 | .ifc "", "\quotient" |
| 68 | .ifc "\numerator", "\remainder" |
| 69 | .if \return |
| 70 | bxcc lr |
| 71 | .else |
| 72 | b 99f |
| 73 | .endif |
| 74 | .else |
| 75 | bcc 20f |
| 76 | .endif |
| 77 | .else |
| 78 | bcc 20f |
| 79 | .endif |
| 80 | movs \tmp, \numerator |
| 81 | movmi \numerator, \numerator, lsr #1 |
| 82 | mov \bits, #30 |
| 83 | .set shift, 16 |
| 84 | .rept 5 |
| 85 | cmn \divisor, \numerator, lsr #shift |
| 86 | subcs \bits, \bits, #shift |
| 87 | movcs \divisor, \divisor, lsl #shift |
| 88 | .set shift, shift >> 1 |
| 89 | .endr |
| 90 | adds \numerator, \numerator, \divisor |
| 91 | subcc \numerator, \numerator, \divisor |
| 92 | add pc, pc, \bits, lsl #3 |
| 93 | nop |
| 94 | .rept 30 |
| 95 | adcs \numerator, \divisor, \numerator, lsl #1 |
| 96 | subcc \numerator, \numerator, \divisor |
| 97 | .endr |
| 98 | adc \numerator, \numerator, \numerator |
| 99 | movs \tmp, \tmp, asr #1 |
| 100 | rsb \bits, \bits, #31 |
| 101 | bmi 10f |
| 102 | .ifc "", "\quotient" |
| 103 | mov \remainder, \numerator, lsr \bits |
| 104 | .else |
| 105 | .ifc "", "\remainder" |
| 106 | mov \divisor, \numerator, lsr \bits |
| 107 | eor \quotient, \numerator, \divisor, lsl \bits |
| 108 | .else |
| 109 | mov \remainder, \numerator, lsr \bits |
| 110 | eor \quotient, \numerator, \remainder, lsl \bits |
| 111 | .endif |
| 112 | .endif |
Andrew Mahone | 18ae2af | 2010-02-01 02:35:28 +0000 | [diff] [blame] | 113 | .ifne \return |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 114 | bx lr |
| 115 | .else |
| 116 | b 99f |
| 117 | .endif |
| 118 | 10: |
| 119 | mov \tmp, \numerator, lsr \bits |
| 120 | eor \numerator, \numerator, \tmp, lsl \bits |
| 121 | sub \bits, \bits, #1 |
| 122 | adc \tmp, \tmp, \tmp |
| 123 | adds \tmp, \tmp, \divisor, asr \bits |
| 124 | .ifnc "", "\quotient" |
| 125 | adc \quotient, \numerator, \numerator |
| 126 | .endif |
| 127 | .ifnc "", "\remainder" |
| 128 | subcc \remainder, \tmp, \divisor, asr \bits |
| 129 | movcs \remainder, \tmp |
| 130 | .endif |
Andrew Mahone | 18ae2af | 2010-02-01 02:35:28 +0000 | [diff] [blame] | 131 | .ifne \return |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 132 | bx lr |
| 133 | .else |
| 134 | b 99f |
| 135 | .endif |
| 136 | 20: |
| 137 | .ifnc "", "\remainder" |
| 138 | .ifnc "\remainder", "\numerator" |
| 139 | mov \remainder, \numerator |
| 140 | .endif |
| 141 | .endif |
| 142 | .ifnc "", "\quotient" |
| 143 | mov \quotient, #0 |
| 144 | .endif |
Andrew Mahone | 18ae2af | 2010-02-01 02:35:28 +0000 | [diff] [blame] | 145 | .ifne \return |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 146 | bx lr |
| 147 | .else |
| 148 | 99: |
| 149 | .endif |
| 150 | .endm |
| 151 | |
| 152 | .macro ARMV4_SDIV32_BODY numerator, divisor, quotient, remainder, bits, sign, div0label, return |
| 153 | /* When this is wrapped for signed division, the wrapper code will handle |
| 154 | inverting the divisor, and also the zero divisor test. */ |
| 155 | ARM_SDIV32_PRE \numerator, \divisor, \sign |
| 156 | .ifnc "", "\div0label" |
| 157 | tst \divisor, \divisor |
| 158 | beq \div0label |
| 159 | .endif |
| 160 | /* This SWAR divider requires a numerator less than 1<<31, because it must |
| 161 | be able to shift the remainder left at each step without shifting out |
| 162 | topmost bit. With signed inputs, whose absolute value may not exceed |
| 163 | 1<<31,this may be accomplished simply by subtracting the divisor before |
| 164 | beginning division, and adding 1 to the quotient. */ |
| 165 | adds \numerator, \numerator, \divisor |
| 166 | bcc 20f |
| 167 | mov \bits, #30 |
| 168 | .set shift, 16 |
| 169 | .rept 5 |
| 170 | cmn \divisor, \numerator, lsr #shift |
| 171 | subcs \bits, \bits, #shift |
| 172 | movcs \divisor, \divisor, lsl #shift |
| 173 | .set shift, shift >> 1 |
| 174 | .endr |
| 175 | adds \numerator, \numerator, \divisor |
| 176 | subcc \numerator, \numerator, \divisor |
| 177 | add pc, pc, \bits, lsl #3 |
| 178 | nop |
| 179 | .rept 30 |
| 180 | adcs \numerator, \divisor, \numerator, lsl #1 |
| 181 | subcc \numerator, \numerator, \divisor |
| 182 | .endr |
| 183 | rsb \bits, \bits, #31 |
| 184 | adc \numerator, \numerator, \numerator |
| 185 | .ifc "", "\quotient" |
| 186 | mov \remainder, \numerator, lsr \bits |
| 187 | .else |
| 188 | .ifc "", "\remainder" |
| 189 | mov \divisor, \numerator, lsr \bits |
| 190 | add \numerator, \numerator, #1 |
| 191 | sub \quotient, \numerator, \divisor, lsl \bits |
| 192 | .else |
| 193 | mov \remainder, \numerator, lsr \bits |
| 194 | add \numerator, \numerator, #1 |
| 195 | sub \quotient, \numerator, \remainder, lsl \bits |
| 196 | .endif |
| 197 | .endif |
Andrew Mahone | 18ae2af | 2010-02-01 02:35:28 +0000 | [diff] [blame] | 198 | .ifne \return |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 199 | ARM_SDIV32_POST \quotient, \remainder, \sign |
| 200 | bx lr |
| 201 | .else |
| 202 | b 99f |
| 203 | .endif |
| 204 | 20: |
| 205 | .ifnc "", "\remainder" |
| 206 | sub \remainder, \numerator, \divisor |
| 207 | .endif |
| 208 | .ifnc "", "\quotient" |
| 209 | mov \quotient, #0 |
| 210 | .endif |
Andrew Mahone | 18ae2af | 2010-02-01 02:35:28 +0000 | [diff] [blame] | 211 | .ifne \return |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 212 | ARM_SDIV32_POST "", \remainder, \sign |
| 213 | bx lr |
| 214 | .else |
| 215 | 99: |
| 216 | ARM_SDIV32_POST \quotient, \remainder, \sign |
| 217 | .endif |
| 218 | .endm |
| 219 | |
| 220 | #else |
| 221 | .macro ARMV5_UDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, div0label, return |
| 222 | cmp \numerator, \divisor |
| 223 | clz \bits, \divisor |
| 224 | bcc 30f |
| 225 | mov \inv, \divisor, lsl \bits |
| 226 | add \neg, pc, \inv, lsr #25 |
| 227 | /* Test whether divisor is 2^N */ |
| 228 | cmp \inv, #1<<31 |
| 229 | /* Load approximate reciprocal */ |
| 230 | ldrhib \inv, [\neg, #.L_udiv_est_table-.-64] |
| 231 | bls 20f |
| 232 | subs \bits, \bits, #7 |
| 233 | rsb \neg, \divisor, #0 |
| 234 | /* Scale approximate reciprocal, or else branch to large-divisor path */ |
| 235 | movpl \divisor, \inv, lsl \bits |
| 236 | bmi 10f |
| 237 | /* Newton-Raphson iteration to improve reciprocal accuracy */ |
| 238 | mul \inv, \divisor, \neg |
| 239 | smlawt \divisor, \divisor, \inv, \divisor |
| 240 | mul \inv, \divisor, \neg |
| 241 | /* Complete N-R math and produce approximate quotient. Use smmla/smmul on |
| 242 | ARMv6. */ |
| 243 | #if ARM_ARCH >= 6 |
| 244 | tst \numerator, \numerator |
| 245 | smmla \divisor, \divisor, \inv, \divisor |
| 246 | /* Branch to large-numerator handler, or else use smmul if sign bit is not |
Andrew Mahone | 950b2df | 2010-02-20 06:29:23 +0000 | [diff] [blame] | 247 | set. This wins on average with random numerators, and should be no |
| 248 | slower than using umull for small numerator, even if prediction fails. |
| 249 | */ |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 250 | bmi 40f |
| 251 | smmul \inv, \numerator, \divisor |
| 252 | #else |
| 253 | /* ARMv5e lacks smmul, so always uses umull. */ |
| 254 | mov \bits, #0 |
| 255 | smlal \bits, \divisor, \inv, \divisor |
| 256 | umull \bits, \inv, \numerator, \divisor |
| 257 | #endif |
| 258 | /* Calculate remainder and correct result. */ |
| 259 | add \numerator, \numerator, \neg |
| 260 | .ifnc "", "\remainder" |
| 261 | mla \remainder, \inv, \neg, \numerator |
| 262 | .ifnc "", "\quotient" |
| 263 | mov \quotient, \inv |
| 264 | cmn \remainder, \neg |
| 265 | subcs \remainder, \remainder, \neg |
| 266 | addpl \remainder, \remainder, \neg, lsl #1 |
| 267 | addcc \quotient, \quotient, #1 |
| 268 | addpl \quotient, \quotient, #2 |
| 269 | .else |
| 270 | cmn \remainder, \neg |
| 271 | subcs \remainder, \remainder, \neg |
| 272 | addpl \remainder, \remainder, \neg, lsl #1 |
| 273 | .endif |
| 274 | .else |
| 275 | mla \divisor, \inv, \neg, \numerator |
| 276 | mov \quotient, \inv |
| 277 | cmn \divisor, \neg |
| 278 | addcc \quotient, \quotient, #1 |
| 279 | addpl \quotient, \quotient, #2 |
| 280 | .endif |
| 281 | .if \return |
| 282 | bx lr |
| 283 | .else |
| 284 | b 99f |
| 285 | .endif |
| 286 | 10: |
| 287 | /* Very large divisors can be handled without further improving the |
| 288 | reciprocal. First the reciprocal must be reduced to ensure that it |
| 289 | underestimates the correct value. */ |
| 290 | rsb \bits, \bits, #0 |
| 291 | sub \inv, \inv, #4 |
| 292 | mov \divisor, \inv, lsr \bits |
| 293 | /* Calculate approximate quotient and remainder */ |
| 294 | umull \bits, \inv, \numerator, \divisor |
| 295 | /* Correct quotient and remainder */ |
| 296 | .ifnc "", "\remainder" |
| 297 | mla \remainder, \inv, \neg, \numerator |
| 298 | .ifnc "", "\quotient" |
| 299 | mov \quotient, \inv |
| 300 | cmn \neg, \remainder, lsr #1 |
| 301 | addcs \remainder, \remainder, \neg, lsl #1 |
| 302 | addcs \quotient, \quotient, #2 |
| 303 | cmn \neg, \remainder |
| 304 | addcs \remainder, \remainder, \neg |
| 305 | addcs \quotient, \quotient, #1 |
| 306 | .else |
| 307 | cmn \neg, \remainder, lsr #1 |
| 308 | addcs \remainder, \remainder, \neg, lsl #1 |
| 309 | cmn \neg, \remainder |
| 310 | addcs \remainder, \remainder, \neg |
| 311 | .endif |
| 312 | .else |
| 313 | mla \divisor, \inv, \neg, \numerator |
| 314 | mov \quotient, \inv |
| 315 | cmn \neg, \divisor, lsr #1 |
| 316 | addcs \divisor, \divisor, \neg, lsl #1 |
| 317 | addcs \quotient, \quotient, #2 |
| 318 | cmn \neg, \divisor |
| 319 | addcs \quotient, \quotient, #1 |
| 320 | .endif |
| 321 | .if \return |
| 322 | bx lr |
| 323 | .else |
| 324 | b 99f |
| 325 | .endif |
| 326 | 20: |
| 327 | /* Handle division by powers of two by shifting right. Mod is handled |
| 328 | by using divisor-1 as a bitmask. */ |
| 329 | .ifnc "", "\remainder" |
| 330 | .ifnc "", "\div0label" |
| 331 | bne \div0label |
| 332 | .endif |
| 333 | .ifnc "", "\quotient" |
| 334 | sub \divisor, \divisor, #1 |
| 335 | rsb \bits, \bits, #31 |
| 336 | and \remainder, \numerator, \divisor |
| 337 | mov \quotient, \numerator, lsr \bits |
| 338 | .else |
| 339 | sub \divisor, \divisor, #1 |
| 340 | and \remainder, \numerator, \divisor |
| 341 | .endif |
| 342 | .else |
| 343 | rsb \bits, \bits, #31 |
| 344 | .ifnc "", "\div0label" |
| 345 | bne \div0label |
| 346 | .endif |
| 347 | mov \quotient, \numerator, lsr \bits |
| 348 | .endif |
| 349 | .if \return |
| 350 | bx lr |
| 351 | .else |
| 352 | b 99f |
| 353 | .endif |
| 354 | 30: |
| 355 | /* Handle numerator < divisor - quotient is zero, remainder is numerator, |
| 356 | which must be restored to its original value on ARMv6. */ |
| 357 | .ifnc "", "\remainder" |
| 358 | mov \remainder, \numerator |
| 359 | .endif |
| 360 | .ifnc "", "\quotient" |
| 361 | mov \quotient, #0 |
| 362 | .endif |
| 363 | .if \return |
| 364 | bx lr |
| 365 | .endif |
| 366 | #if ARM_ARCH >= 6 |
| 367 | 40: |
| 368 | /* Handle large (sign bit set) numerators. Works exactly as the ARMv5e code |
| 369 | above 10:. */ |
| 370 | umull \bits, \inv, \numerator, \divisor |
| 371 | add \numerator, \numerator, \neg |
| 372 | .ifnc "", "\remainder" |
| 373 | mla \remainder, \inv, \neg, \numerator |
| 374 | .ifnc "", "\quotient" |
| 375 | mla \remainder, \inv, \neg, \numerator |
| 376 | mov \quotient, \inv |
| 377 | cmn \remainder, \neg |
| 378 | subcs \remainder, \remainder, \neg |
| 379 | addpl \remainder, \remainder, \neg, lsl #1 |
| 380 | addcc \quotient, \quotient, #1 |
| 381 | addpl \quotient, \quotient, #2 |
| 382 | .else |
| 383 | cmn \remainder, \neg |
| 384 | subcs \remainder, \remainder, \neg |
| 385 | addpl \remainder, \remainder, \neg, lsl #1 |
| 386 | .endif |
| 387 | .else |
| 388 | mla \divisor, \inv, \neg, \numerator |
| 389 | mov \quotient, \inv |
| 390 | cmn \divisor, \neg |
| 391 | addcc \quotient, \quotient, #1 |
| 392 | addpl \quotient, \quotient, #2 |
| 393 | .endif |
| 394 | .if \return |
| 395 | bx lr |
| 396 | .else |
| 397 | b 99f |
| 398 | .endif |
| 399 | #endif |
| 400 | 99: |
| 401 | .endm |
| 402 | |
| 403 | .macro ARMV5_SDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, sign, div0label, return |
| 404 | /* sign[31] = divisor sign */ |
| 405 | ands \sign, \divisor, #1<<31 |
| 406 | rsbne \divisor, \divisor, #0 |
| 407 | /* sign[31] = result sign, sign[0:30], C = numerator sign */ |
| 408 | eors \sign, \sign, \numerator, asr #32 |
| 409 | clz \bits, \divisor |
| 410 | rsbcs \numerator, \numerator, #0 |
| 411 | /* On ARMv6, subtract divisor before performing division, which ensures |
| 412 | numerator sign bit is clear and smmul may be used in place of umull. The |
| 413 | fixup for the results can be fit entirely into existing delay slots on |
| 414 | the main division paths. It costs 1c in the num<div path if the |
| 415 | the remainder is to be produced in the numerator's register, and 1c in |
| 416 | the power-of-2-divisor path only if producing both remainder and |
| 417 | quotient. */ |
| 418 | #if ARM_ARCH >= 6 |
| 419 | subs \numerator, \numerator, \divisor |
| 420 | #else |
| 421 | cmp \numerator, \divisor |
| 422 | #endif |
| 423 | movcs \inv, \divisor, lsl \bits |
| 424 | bcc 30f |
| 425 | /* Test whether divisor is 2^N */ |
| 426 | cmp \inv, #1<<31 |
| 427 | add \inv, pc, \inv, lsr #25 |
| 428 | bls 20f |
| 429 | /* Load approximate reciprocal */ |
| 430 | ldrb \inv, [\inv, #.L_udiv_est_table-.-64] |
| 431 | subs \bits, \bits, #7 |
| 432 | rsb \neg, \divisor, #0 |
| 433 | /* Scale approximate reciprocal, or else branch to large-divisor path */ |
| 434 | movpl \divisor, \inv, lsl \bits |
| 435 | bmi 10f |
| 436 | /* Newton-Raphson iteration to improve reciprocal accuracy */ |
| 437 | mul \inv, \divisor, \neg |
| 438 | smlawt \divisor, \divisor, \inv, \divisor |
| 439 | mul \inv, \divisor, \neg |
| 440 | /* Complete N-R math and produce approximate quotient. Use smmla/smmul on |
| 441 | ARMv6. */ |
| 442 | #if ARM_ARCH >= 6 |
| 443 | smmla \divisor, \divisor, \inv, \divisor |
| 444 | smmul \inv, \numerator, \divisor |
| 445 | #else |
| 446 | mov \bits, #0 |
| 447 | smlal \bits, \divisor, \inv, \divisor |
| 448 | umull \bits, \inv, \numerator, \divisor |
| 449 | #endif |
| 450 | /* Calculate remainder and correct quotient. */ |
| 451 | add \numerator, \numerator, \neg |
| 452 | .ifnc "", "\remainder" |
| 453 | mla \remainder, \inv, \neg, \numerator |
| 454 | .ifnc "", "\quotient" |
| 455 | #if ARM_ARCH >= 6 |
| 456 | add \quotient, \inv, #1 |
| 457 | #else |
| 458 | mov \quotient, \inv |
| 459 | #endif |
| 460 | cmn \remainder, \neg |
| 461 | subcs \remainder, \remainder, \neg |
| 462 | addpl \remainder, \remainder, \neg, lsl #1 |
| 463 | addcc \quotient, \quotient, #1 |
| 464 | addpl \quotient, \quotient, #2 |
| 465 | .else |
| 466 | cmn \remainder, \neg |
| 467 | subcs \remainder, \remainder, \neg |
| 468 | addpl \remainder, \remainder, \neg, lsl #1 |
| 469 | .endif |
| 470 | .else |
| 471 | mla \divisor, \inv, \neg, \numerator |
| 472 | #if ARM_ARCH >= 6 |
| 473 | add \quotient, \inv, #1 |
| 474 | #else |
| 475 | mov \quotient, \inv |
| 476 | #endif |
| 477 | cmn \divisor, \neg |
| 478 | addcc \quotient, \quotient, #1 |
| 479 | addpl \quotient, \quotient, #2 |
| 480 | .endif |
| 481 | ARM_SDIV32_POST \quotient, \remainder, \sign |
| 482 | .ifnc "", "\return" |
| 483 | \return |
| 484 | .else |
| 485 | b 99f |
| 486 | .endif |
| 487 | 10: |
| 488 | /* Very large divisors can be handled without further improving the |
| 489 | reciprocal. First the reciprocal must be reduced to ensure that it |
| 490 | underestimates the correct value. */ |
| 491 | rsb \bits, \bits, #0 |
| 492 | sub \inv, \inv, #4 |
| 493 | mov \divisor, \inv, lsr \bits |
| 494 | /* Calculate approximate quotient and remainder */ |
| 495 | #if ARM_ARCH >= 6 |
| 496 | smmul \inv, \numerator, \divisor |
| 497 | #else |
| 498 | umull \bits, \inv, \numerator, \divisor |
| 499 | #endif |
| 500 | /* Correct quotient and remainder */ |
| 501 | .ifnc "", "\remainder" |
| 502 | mla \remainder, \inv, \neg, \numerator |
| 503 | .ifnc "", "\quotient" |
| 504 | #if ARM_ARCH >= 6 |
| 505 | add \quotient, \inv, #1 |
| 506 | #else |
| 507 | mov \quotient, \inv |
| 508 | #endif |
| 509 | cmn \neg, \remainder, lsr #1 |
| 510 | addcs \remainder, \remainder, \neg, lsl #1 |
| 511 | addcs \quotient, \quotient, #2 |
| 512 | cmn \neg, \remainder |
| 513 | addcs \remainder, \remainder, \neg |
| 514 | addcs \quotient, \quotient, #1 |
| 515 | .else |
| 516 | cmn \neg, \remainder, lsr #1 |
| 517 | addcs \remainder, \remainder, \neg, lsl #1 |
| 518 | cmn \neg, \remainder |
| 519 | addcs \remainder, \remainder, \neg |
| 520 | .endif |
| 521 | .else |
| 522 | mla \divisor, \inv, \neg, \numerator |
| 523 | #if ARM_ARCH >= 6 |
| 524 | add \quotient, \inv, #1 |
| 525 | #else |
| 526 | mov \quotient, \inv |
| 527 | #endif |
| 528 | cmn \neg, \divisor, lsr #1 |
| 529 | addcs \divisor, \divisor, \neg, lsl #1 |
| 530 | addcs \quotient, \quotient, #2 |
| 531 | cmn \neg, \divisor |
| 532 | addcs \quotient, \quotient, #1 |
| 533 | .endif |
| 534 | ARM_SDIV32_POST \quotient, \remainder, \sign |
| 535 | .ifnc "", "\return" |
| 536 | \return |
| 537 | .else |
| 538 | b 99f |
| 539 | .endif |
| 540 | 20: |
| 541 | /* Handle division by powers of two by shifting right. Mod is handled |
| 542 | by using divisor-1 as a bitmask. */ |
| 543 | .ifnc "", "\div0label" |
| 544 | bne \div0label |
| 545 | .endif |
| 546 | .ifnc "", "\remainder" |
| 547 | .ifnc "", "\quotient" |
| 548 | rsb \bits, \bits, #31 |
| 549 | #if ARM_ARCH >= 6 |
| 550 | add \numerator, \numerator, \divisor |
| 551 | #endif |
| 552 | sub \divisor, \divisor, #1 |
| 553 | and \remainder, \numerator, \divisor |
| 554 | mov \quotient, \numerator, lsr \bits |
| 555 | .else |
| 556 | sub \divisor, \divisor, #1 |
| 557 | and \remainder, \numerator, \divisor |
| 558 | .endif |
| 559 | .else |
| 560 | rsb \bits, \bits, #31 |
| 561 | #if ARM_ARCH >= 6 |
| 562 | add \numerator, \numerator, \divisor |
| 563 | #endif |
| 564 | mov \quotient, \numerator, lsr \bits |
| 565 | .endif |
| 566 | ARM_SDIV32_POST \quotient, \remainder, \sign |
| 567 | .ifnc "", "\return" |
| 568 | \return |
| 569 | .else |
| 570 | b 99f |
| 571 | .endif |
| 572 | 30: |
| 573 | /* Handle numerator < divisor - quotient is zero, remainder is numerator, |
| 574 | which must be restored to its original value on ARMv6. */ |
| 575 | .ifnc "", "\remainder" |
| 576 | #if ARM_ARCH >= 6 |
| 577 | add \remainder, \numerator, \divisor |
| 578 | #else |
| 579 | .ifnc "\remainder", "\numerator" |
| 580 | mov \remainder, \numerator |
| 581 | .endif |
| 582 | #endif |
| 583 | .endif |
| 584 | .ifnc "", "\quotient" |
| 585 | mov \quotient, #0 |
| 586 | .endif |
Andrew Mahone | e77cdd1 | 2010-02-02 15:55:10 +0000 | [diff] [blame] | 587 | .ifnc "", "\remainder" |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 588 | ARM_SDIV32_POST "", \remainder, \sign |
Andrew Mahone | e77cdd1 | 2010-02-02 15:55:10 +0000 | [diff] [blame] | 589 | .endif |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 590 | .ifnc "", "\return" |
| 591 | \return |
| 592 | .endif |
| 593 | 99: |
| 594 | .endm |
Andrew Mahone | 18ae2af | 2010-02-01 02:35:28 +0000 | [diff] [blame] | 595 | #endif |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 596 | |
| 597 | .section .text |
| 598 | |
| 599 | __div0_wrap_s: |
| 600 | sub sp, sp, #4 |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 601 | b __div0 |
| 602 | .size __div0_wrap_s, . - __div0_wrap_s |
| 603 | |
| 604 | __div0_wrap: |
| 605 | str lr, [sp, #-4]! |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 606 | b __div0 |
| 607 | .size __div0_wrap, . - __div0_wrap |
| 608 | |
| 609 | #ifndef __ARM_EABI__ |
| 610 | .global __divsi3 |
| 611 | .type __divsi3,%function |
Andrew Mahone | e77cdd1 | 2010-02-02 15:55:10 +0000 | [diff] [blame] | 612 | .global __udivsi3 |
| 613 | .type __udivsi3,%function |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 614 | .global __udivsi3 |
| 615 | .type __udivsi3,%function |
| 616 | #else |
| 617 | /* The div+mod averagess a fraction of a cycle worse for signed values, and |
| 618 | slightly better for unsigned, so just alias div to divmod. */ |
| 619 | .global __aeabi_uidivmod |
| 620 | .type __aeabi_uidivmod,%function |
| 621 | .global __aeabi_uidiv |
| 622 | .type __aeabi_uidiv,%function |
| 623 | .set __aeabi_uidiv,__aeabi_uidivmod |
| 624 | .global __aeabi_idivmod |
| 625 | .type __aeabi_idivmod,%function |
| 626 | .global __aeabi_idiv |
| 627 | .type __aeabi_idiv,%function |
| 628 | .set __aeabi_idiv,__aeabi_idivmod |
| 629 | #endif |
| 630 | |
| 631 | |
| 632 | #if ARM_ARCH < 5 |
| 633 | .global __clzsi2 |
| 634 | .type __clzsi2, %function |
| 635 | |
| 636 | __clzsi2: |
| 637 | orr r0, r0, r0, lsr #8 |
| 638 | orr r0, r0, r0, lsr #4 |
| 639 | orr r0, r0, r0, lsr #2 |
| 640 | orr r0, r0, r0, lsr #1 |
| 641 | bic r0, r0, r0, lsr #16 |
| 642 | rsb r0, r0, r0, lsl #14 |
| 643 | rsb r0, r0, r0, lsl #11 |
| 644 | rsb r0, r0, r0, lsl #9 |
| 645 | ldrb r0, [pc, r0, lsr #26] |
| 646 | bx lr |
| 647 | .byte 32, 20, 19, 0, 0, 18, 0, 7, 10, 17, 0, 0, 14, 0, 6, 0 |
| 648 | .byte 0, 9, 0, 16, 0, 0, 1, 26, 0, 13, 0, 0, 24, 5, 0, 0 |
| 649 | .byte 0, 21, 0, 8, 11, 0, 15, 0, 0, 0, 0, 2, 27, 0, 25, 0 |
| 650 | .byte 22, 0, 12, 0, 0, 3, 28, 0, 23, 0, 4, 29, 0, 0, 30, 31 |
| 651 | .size __clzsi2, .-__clzsi2 |
| 652 | |
| 653 | #ifndef __ARM_EABI__ |
| 654 | __udivsi3: |
| 655 | ARMV4_UDIV32_BODY r0, r1, r0, "", r2, r3, __div0_wrap, 1 |
| 656 | .size __udivsi3, . - __udivsi3 |
| 657 | |
| 658 | __divsi3: |
| 659 | ARMV4_SDIV32_BODY r0, r1, r0, "", r2, r3, __div0_wrap, 1 |
| 660 | .size __divsi3, . - __divsi3 |
| 661 | |
| 662 | #else |
| 663 | __aeabi_uidivmod: |
| 664 | ARMV4_UDIV32_BODY r0, r1, r0, r1, r2, r3, __div0_wrap, 1 |
Boris Gjenero | 415f579 | 2011-11-09 19:20:33 +0000 | [diff] [blame] | 665 | .size __aeabi_uidivmod, . - __aeabi_uidivmod |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 666 | |
Andrew Mahone | e457ccf | 2010-02-01 02:35:35 +0000 | [diff] [blame] | 667 | __aeabi_idivmod: |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 668 | ARMV4_SDIV32_BODY r0, r1, r0, r1, r2, r3, __div0_wrap, 1 |
| 669 | .size __aeabi_idivmod, . - __aeabi_idivmod |
| 670 | #endif |
| 671 | |
| 672 | #else |
| 673 | #ifndef __ARM_EABI__ |
| 674 | __udivsi3: |
| 675 | ARMV5_UDIV32_BODY r0, r1, r0, "", r2, r3, ip, __div0_wrap, 1 |
| 676 | .size __udivsi3, . - __udivsi3 |
| 677 | |
| 678 | __divsi3: |
Andrew Mahone | e77cdd1 | 2010-02-02 15:55:10 +0000 | [diff] [blame] | 679 | str lr, [sp, #-4] |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 680 | ARMV5_SDIV32_BODY r0, r1, r0, "", r2, lr, ip, r3, __div0_wrap_s, "ldr pc, [sp, #-4]" |
| 681 | .size __divsi3, . - __divsi3 |
| 682 | |
| 683 | #else |
| 684 | __aeabi_uidivmod: |
| 685 | ARMV5_UDIV32_BODY r0, r1, r0, r1, r2, r3, ip, __div0_wrap, 1 |
Boris Gjenero | 415f579 | 2011-11-09 19:20:33 +0000 | [diff] [blame] | 686 | .size __aeabi_uidivmod, . - __aeabi_uidivmod |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 687 | |
Andrew Mahone | e457ccf | 2010-02-01 02:35:35 +0000 | [diff] [blame] | 688 | __aeabi_idivmod: |
Andrew Mahone | e77cdd1 | 2010-02-02 15:55:10 +0000 | [diff] [blame] | 689 | str lr, [sp, #-4] |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 690 | ARMV5_SDIV32_BODY r0, r1, r0, r1, r2, lr, ip, r3, __div0_wrap_s, "ldr pc, [sp, #-4]" |
| 691 | .size __aeabi_idivmod, . - __aeabi_idivmod |
| 692 | #endif |
Andrew Mahone | bff5a35 | 2010-02-01 01:36:46 +0000 | [diff] [blame] | 693 | |
| 694 | .L_udiv_est_table: |
| 695 | .byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6 |
| 696 | .byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf |
| 697 | .byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc |
| 698 | .byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac |
| 699 | .byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f |
| 700 | .byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93 |
| 701 | .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89 |
| 702 | .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81 |
| 703 | #endif |