Jens Arnold | 45d4345 | 2007-10-17 00:29:44 +0000 | [diff] [blame^] | 1 | /*************************************************************************** |
| 2 | * __________ __ ___. |
| 3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ |
| 4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / |
| 5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < |
| 6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ |
| 7 | * \/ \/ \/ \/ \/ |
| 8 | * $Id $ |
| 9 | * |
| 10 | * Copyright (C) 2007 by Michael Sevakis |
| 11 | * |
| 12 | * All files in this archive are subject to the GNU General Public License. |
| 13 | * See the file COPYING in the source tree root for full license agreement. |
| 14 | * |
| 15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY |
| 16 | * KIND, either express or implied. |
| 17 | * |
| 18 | ****************************************************************************/ |
| 19 | |
| 20 | .global mpeg2_idct_copy_arm |
| 21 | .type mpeg2_idct_copy_arm, %function |
| 22 | .global mpeg2_idct_add_arm |
| 23 | .type mpeg2_idct_add_arm, %function |
| 24 | |
| 25 | |
| 26 | /* Custom calling convention: |
| 27 | * r0 contains block pointer and is non-volatile |
| 28 | * all non-volatile c context saved and restored on its behalf |
| 29 | */ |
| 30 | .idct: |
| 31 | add r12, r0, #128 |
| 32 | 1: |
| 33 | ldrsh r1, [r0, #0] /* d0 */ |
| 34 | ldrsh r2, [r0, #2] /* d1 */ |
| 35 | ldrsh r3, [r0, #4] /* d2 */ |
| 36 | ldrsh r4, [r0, #6] /* d3 */ |
| 37 | ldrsh r5, [r0, #8] /* d0 */ |
| 38 | ldrsh r6, [r0, #10] /* d1 */ |
| 39 | ldrsh r7, [r0, #12] /* d2 */ |
| 40 | ldrsh r8, [r0, #14] /* d3 */ |
| 41 | orrs r9, r2, r3 |
| 42 | orreqs r9, r4, r5 |
| 43 | orreqs r9, r6, r7 |
| 44 | cmpeq r8, #0 |
| 45 | bne 2f |
| 46 | mov r1, r1, asl #15 |
| 47 | bic r1, r1, #0x8000 |
| 48 | orr r1, r1, r1, lsr #16 |
| 49 | str r1, [r0], #4 |
| 50 | str r1, [r0], #4 |
| 51 | str r1, [r0], #4 |
| 52 | str r1, [r0], #4 |
| 53 | cmp r0, r12 |
| 54 | blo 1b |
| 55 | b 3f |
| 56 | 2: |
| 57 | mov r1, r1, asl #11 /* r1 = d0 = (block[0] << 11) + 2048 */ |
| 58 | add r1, r1, #2048 |
| 59 | add r1, r1, r3, asl #11 /* r1 = t0 = d0 + (block[2] << 11) */ |
| 60 | sub r3, r1, r3, asl #12 /* r3 = t1 = d0 - (block[2] << 11) */ |
| 61 | |
| 62 | add r9, r2, r4 /* r9 = tmp = (d1+d3)*(1108/4) */ |
| 63 | add r10, r9, r9, asl #2 |
| 64 | add r10, r10, r9, asl #4 |
| 65 | add r9, r10, r9, asl #8 |
| 66 | |
| 67 | add r10, r2, r2, asl #4 /* r2 = t2 = tmp + (d1*(1568/32)*8) */ |
| 68 | add r2, r10, r2, asl #5 |
| 69 | add r2, r9, r2, asl #3 |
| 70 | |
| 71 | add r10, r4, r4, asl #2 /* r4 = t3 = tmp - (d3*(3784/8)*2) */ |
| 72 | rsb r10, r10, r4, asl #6 |
| 73 | add r4, r4, r10, asl #3 |
| 74 | sub r4, r9, r4, asl #1 |
| 75 | /* t2 & t3 are 1/4 final value here */ |
| 76 | add r1, r1, r2, asl #2 /* r1 = a0 = t0 + t2 */ |
| 77 | sub r2, r1, r2, asl #3 /* r2 = a3 = t0 - t2 */ |
| 78 | add r3, r3, r4, asl #2 /* r3 = a1 = t1 + t3 */ |
| 79 | sub r4, r3, r4, asl #3 /* r4 = a2 = t1 - t3 */ |
| 80 | |
| 81 | add r9, r8, r5 /* r9 = tmp = 565*(d3 + d0) */ |
| 82 | add r10, r9, r9, asl #4 |
| 83 | add r10, r10, r10, asl #5 |
| 84 | add r9, r10, r9, asl #2 |
| 85 | |
| 86 | add r10, r5, r5, asl #4 /* r5 = t0 = tmp + (((2276/4)*d0)*4) */ |
| 87 | add r10, r10, r10, asl #5 |
| 88 | add r5, r10, r5, asl #3 |
| 89 | add r5, r9, r5, asl #2 |
| 90 | |
| 91 | add r10, r8, r8, asl #2 /* r8 = t1 = tmp - (((3406/2)*d3)*2) */ |
| 92 | add r10, r10, r10, asl #4 |
| 93 | add r10, r10, r8, asl #7 |
| 94 | rsb r8, r8, r10, asl #3 |
| 95 | sub r8, r9, r8, asl #1 |
| 96 | |
| 97 | add r9, r6, r7 /* r9 = tmp = (2408/8)*(d1 + d2) */ |
| 98 | add r10, r9, r9, asl #3 |
| 99 | add r10, r10, r10, asl #5 |
| 100 | add r9, r10, r9, asl #2 |
| 101 | |
| 102 | add r10, r7, r7, asl #3 /* r7 = t2 = (tmp*8) - 799*d2 */ |
| 103 | add r10, r10, r7, asl #4 |
| 104 | rsb r7, r7, r10, asl #5 |
| 105 | rsb r7, r7, r9, asl #3 |
| 106 | |
| 107 | sub r10, r6, r6, asl #4 /* r6 = t3 = (tmp*8) - 4017*d1 */ |
| 108 | sub r10, r10, r6, asl #6 |
| 109 | add r10, r10, r6, asl #12 |
| 110 | add r6, r10, r6 |
| 111 | rsb r6, r6, r9, asl #3 |
| 112 | /* t0 = r5, t1 = r8, t2 = r7, t3 = r6*/ |
| 113 | add r9, r5, r7 /* r9 = b0 = t0 + t2 */ |
| 114 | add r10, r8, r6 /* r10 = b3 = t1 + t3 */ |
| 115 | sub r5, r5, r7 /* t0 -= t2 */ |
| 116 | sub r8, r8, r6 /* t1 -= t3 */ |
| 117 | add r6, r5, r8 /* r6 = t0 + t1 */ |
| 118 | sub r7, r5, r8 /* r7 = t0 - t1 */ |
| 119 | |
| 120 | add r11, r6, r6, asr #2 /* r6 = b1 = r6*(181/128) */ |
| 121 | add r11, r11, r11, asr #5 |
| 122 | add r6, r11, r6, asr #3 |
| 123 | add r11, r7, r7, asr #2 /* r7 = b2 = r7*(181/128) */ |
| 124 | add r11, r11, r11, asr #5 |
| 125 | add r7, r11, r7, asr #3 |
| 126 | /* r1 = a0, r3 = a1, r4 = a2, r2 = a3 */ |
| 127 | /* r9 = b0, r6 = b1*2, r7 = b2*2, r10 = b3 */ |
| 128 | add r5, r1, r9 /* block[0] = (a0 + b0) >> 12 */ |
| 129 | mov r5, r5, asr #12 |
| 130 | strh r5, [r0], #2 |
| 131 | add r8, r3, r6, asr #1 /* block[1] = (a1 + b1) >> 12 */ |
| 132 | mov r8, r8, asr #12 |
| 133 | strh r8, [r0], #2 |
| 134 | add r5, r4, r7, asr #1 /* block[2] = (a2 + b2) >> 12 */ |
| 135 | mov r5, r5, asr #12 |
| 136 | strh r5, [r0], #2 |
| 137 | add r8, r2, r10 /* block[3] = (a3 + b3) >> 12 */ |
| 138 | mov r8, r8, asr #12 |
| 139 | strh r8, [r0], #2 |
| 140 | sub r5, r2, r10 /* block[4] = (a3 - b3) >> 12 */ |
| 141 | mov r5, r5, asr #12 |
| 142 | strh r5, [r0], #2 |
| 143 | sub r8, r4, r7, asr #1 /* block[5] = (a2 - b2) >> 12 */ |
| 144 | mov r8, r8, asr #12 |
| 145 | strh r8, [r0], #2 |
| 146 | sub r5, r3, r6, asr #1 /* block[6] = (a1 - b1) >> 12 */ |
| 147 | mov r5, r5, asr #12 |
| 148 | strh r5, [r0], #2 |
| 149 | sub r8, r1, r9 /* block[7] = (a0 - b0) >> 12 */ |
| 150 | mov r8, r8, asr #12 |
| 151 | strh r8, [r0], #2 |
| 152 | cmp r0, r12 |
| 153 | blo 1b |
| 154 | 3: |
| 155 | sub r0, r0, #128 |
| 156 | add r12, r0, #16 |
| 157 | 4: |
| 158 | ldrsh r1, [r0, #0*8] /* d0 */ |
| 159 | ldrsh r2, [r0, #2*8] /* d1 */ |
| 160 | ldrsh r3, [r0, #4*8] /* d2 */ |
| 161 | ldrsh r4, [r0, #6*8] /* d3 */ |
| 162 | ldrsh r5, [r0, #8*8] /* d0 */ |
| 163 | ldrsh r6, [r0, #10*8] /* d1 */ |
| 164 | ldrsh r7, [r0, #12*8] /* d2 */ |
| 165 | ldrsh r8, [r0, #14*8] /* d3 */ |
| 166 | |
| 167 | mov r1, r1, asl #11 /* r1 = d0 = (block[0] << 11) + 2048 */ |
| 168 | add r1, r1, #65536 |
| 169 | add r1, r1, r3, asl #11 /* r1 = t0 = d0 + d2:(block[2] << 11) */ |
| 170 | sub r3, r1, r3, asl #12 /* r3 = t1 = d0 - d2:(block[2] << 11) */ |
| 171 | |
| 172 | add r9, r2, r4 /* r9 = tmp = (d1+d3)*(1108/4) */ |
| 173 | add r10, r9, r9, asl #2 |
| 174 | add r10, r10, r9, asl #4 |
| 175 | add r9, r10, r9, asl #8 |
| 176 | |
| 177 | add r11, r2, r2, asl #4 /* r2 = t2 = tmp + (d1*(1568/32)*8) */ |
| 178 | add r2, r11, r2, asl #5 |
| 179 | add r2, r9, r2, asl #3 |
| 180 | |
| 181 | add r10, r4, r4, asl #2 /* r4 = t3 = tmp - (d3*(3784/8)*2) */ |
| 182 | rsb r10, r10, r4, asl #6 |
| 183 | add r4, r4, r10, asl #3 |
| 184 | sub r4, r9, r4, asl #1 |
| 185 | /* t2 & t3 are 1/4 final value here */ |
| 186 | add r1, r1, r2, asl #2 /* r1 = a0 = t0 + t2 */ |
| 187 | sub r2, r1, r2, asl #3 /* r2 = a3 = t0 - t2 */ |
| 188 | add r3, r3, r4, asl #2 /* r3 = a1 = t1 + t3 */ |
| 189 | sub r4, r3, r4, asl #3 /* r4 = a2 = t1 - t3 */ |
| 190 | |
| 191 | add r9, r8, r5 /* r9 = tmp = 565*(d3 + d0) */ |
| 192 | add r10, r9, r9, asl #4 |
| 193 | add r10, r10, r10, asl #5 |
| 194 | add r9, r10, r9, asl #2 |
| 195 | |
| 196 | add r10, r5, r5, asl #4 /* r5 = t0 = tmp + (((2276/4)*d0)*4) */ |
| 197 | add r10, r10, r10, asl #5 |
| 198 | add r5, r10, r5, asl #3 |
| 199 | add r5, r9, r5, asl #2 |
| 200 | |
| 201 | add r10, r8, r8, asl #2 /* r8 = t1 = tmp - (((3406/2)*d3)*2) */ |
| 202 | add r10, r10, r10, asl #4 |
| 203 | add r10, r10, r8, asl #7 |
| 204 | rsb r8, r8, r10, asl #3 |
| 205 | sub r8, r9, r8, asl #1 |
| 206 | |
| 207 | add r9, r6, r7 /* r9 = tmp = (2408/8)*(d1 + d2) */ |
| 208 | add r10, r9, r9, asl #3 |
| 209 | add r10, r10, r10, asl #5 |
| 210 | add r9, r10, r9, asl #2 |
| 211 | |
| 212 | add r10, r7, r7, asl #3 /* r7 = t2 = (tmp*8) - 799*d2 */ |
| 213 | add r10, r10, r7, asl #4 |
| 214 | rsb r7, r7, r10, asl #5 |
| 215 | rsb r7, r7, r9, asl #3 |
| 216 | |
| 217 | sub r10, r6, r6, asl #4 /* r6 = t3 = (tmp*8) - 4017*d1 */ |
| 218 | sub r10, r10, r6, asl #6 |
| 219 | add r10, r10, r6, asl #12 |
| 220 | add r6, r10, r6 |
| 221 | rsb r6, r6, r9, asl #3 |
| 222 | /* t0=r5, t1=r8, t2=r7, t3=r6*/ |
| 223 | add r9, r5, r7 /* r9 = b0 = t0 + t2 */ |
| 224 | add r10, r8, r6 /* r10 = b3 = t1 + t3 */ |
| 225 | sub r5, r5, r7 /* t0 -= t2 */ |
| 226 | sub r8, r8, r6 /* t1 -= t3 */ |
| 227 | add r6, r5, r8 /* r6 = t0 + t1 */ |
| 228 | sub r7, r5, r8 /* r7 = t0 - t1 */ |
| 229 | |
| 230 | add r11, r6, r6, asr #2 /* r6 = b1 = r5*(181/128) */ |
| 231 | add r11, r11, r11, asr #5 |
| 232 | add r6, r11, r6, asr #3 |
| 233 | add r11, r7, r7, asr #2 /* r7 = b2 = r6*(181/128) */ |
| 234 | add r11, r11, r11, asr #5 |
| 235 | add r7, r11, r7, asr #3 |
| 236 | /* r1 = a0, r3 = a1, r4 = a2, r2 = a3 */ |
| 237 | /* r9 = b0, r6 = b1*2, r7 = b2*2, r10 = b3 */ |
| 238 | add r5, r1, r9 /* block[0] = (a0 + b0) >> 17 */ |
| 239 | mov r5, r5, asr #17 |
| 240 | strh r5, [r0, #0*8] |
| 241 | add r8, r3, r6, asr #1 /* block[1] = (a1 + b1) >> 17 */ |
| 242 | mov r8, r8, asr #17 |
| 243 | strh r8, [r0, #2*8] |
| 244 | add r5, r4, r7, asr #1 /* block[2] = (a2 + b2) >> 17 */ |
| 245 | mov r5, r5, asr #17 |
| 246 | strh r5, [r0, #4*8] |
| 247 | add r8, r2, r10 /* block[3] = (a3 + b3) >> 17 */ |
| 248 | mov r8, r8, asr #17 |
| 249 | strh r8, [r0, #6*8] |
| 250 | sub r5, r2, r10 /* block[4] = (a3 - b3) >> 17 */ |
| 251 | mov r5, r5, asr #17 |
| 252 | strh r5, [r0, #8*8] |
| 253 | sub r8, r4, r7, asr #1 /* block[5] = (a2 - b2) >> 17 */ |
| 254 | mov r8, r8, asr #17 |
| 255 | strh r8, [r0, #10*8] |
| 256 | sub r5, r3, r6, asr #1 /* block[6] = (a1 - b1) >> 17 */ |
| 257 | mov r5, r5, asr #17 |
| 258 | strh r5, [r0, #12*8] |
| 259 | sub r8, r1, r9 /* block[7] = (a0 - b0) >> 17 */ |
| 260 | mov r8, r8, asr #17 |
| 261 | strh r8, [r0, #14*8] |
| 262 | add r0, r0, #2 |
| 263 | cmp r0, r12 |
| 264 | blo 4b |
| 265 | sub r0, r0, #16 |
| 266 | bx lr |
| 267 | |
| 268 | mpeg2_idct_copy_arm: |
| 269 | stmfd sp!, { r1-r2, r4-r12, lr } |
| 270 | bl .idct |
| 271 | ldmfd sp!, { r1-r2 } |
| 272 | mov r11, #0 |
| 273 | add r12, r0, #128 |
| 274 | 1: |
| 275 | ldrsh r3, [r0, #0] |
| 276 | ldrsh r4, [r0, #2] |
| 277 | ldrsh r5, [r0, #4] |
| 278 | ldrsh r6, [r0, #6] |
| 279 | ldrsh r7, [r0, #8] |
| 280 | ldrsh r8, [r0, #10] |
| 281 | ldrsh r9, [r0, #12] |
| 282 | ldrsh r10, [r0, #14] |
| 283 | cmp r3, #255 |
| 284 | mvnhi r3, r3, asr #31 |
| 285 | strb r3, [r1, #0] |
| 286 | str r11, [r0], #4 |
| 287 | cmp r4, #255 |
| 288 | mvnhi r4, r4, asr #31 |
| 289 | strb r4, [r1, #1] |
| 290 | cmp r5, #255 |
| 291 | mvnhi r5, r5, asr #31 |
| 292 | strb r5, [r1, #2] |
| 293 | str r11, [r0], #4 |
| 294 | cmp r6, #255 |
| 295 | mvnhi r6, r6, asr #31 |
| 296 | strb r6, [r1, #3] |
| 297 | cmp r7, #255 |
| 298 | mvnhi r7, r7, asr #31 |
| 299 | strb r7, [r1, #4] |
| 300 | str r11, [r0], #4 |
| 301 | cmp r8, #255 |
| 302 | mvnhi r8, r8, asr #31 |
| 303 | strb r8, [r1, #5] |
| 304 | cmp r9, #255 |
| 305 | mvnhi r9, r9, asr #31 |
| 306 | strb r9, [r1, #6] |
| 307 | str r11, [r0], #4 |
| 308 | cmp r10, #255 |
| 309 | mvnhi r10, r10, asr #31 |
| 310 | strb r10, [r1, #7] |
| 311 | add r1, r1, r2 |
| 312 | cmp r0, r12 |
| 313 | blo 1b |
| 314 | ldmfd sp!, { r4-r12, pc } |
| 315 | |
| 316 | mpeg2_idct_add_arm: |
| 317 | cmp r0, #129 |
| 318 | mov r0, r1 |
| 319 | ldreqsh r1, [r0, #0] |
| 320 | bne 1f |
| 321 | and r1, r1, #0x70 |
| 322 | cmp r1, #0x40 |
| 323 | bne 3f |
| 324 | 1: |
| 325 | stmfd sp!, { r2-r12, lr } |
| 326 | bl .idct |
| 327 | ldmfd sp!, { r1-r2 } |
| 328 | mov r11, #0 |
| 329 | add r12, r0, #128 |
| 330 | 2: |
| 331 | ldrb r3, [r1, #0] |
| 332 | ldrb r4, [r1, #1] |
| 333 | ldrb r5, [r1, #2] |
| 334 | ldrb r6, [r1, #3] |
| 335 | ldrsh r7, [r0, #0] |
| 336 | ldrsh r8, [r0, #2] |
| 337 | ldrsh r9, [r0, #4] |
| 338 | ldrsh r10, [r0, #6] |
| 339 | add r7, r7, r3 |
| 340 | ldrb r3, [r1, #4] |
| 341 | cmp r7, #255 |
| 342 | mvnhi r7, r7, asr #31 |
| 343 | strb r7, [r1, #0] |
| 344 | ldrsh r7, [r0, #8] |
| 345 | add r8, r8, r4 |
| 346 | ldrb r4, [r1, #5] |
| 347 | cmp r8, #255 |
| 348 | mvnhi r8, r8, asr #31 |
| 349 | strb r8, [r1, #1] |
| 350 | ldrsh r8, [r0, #10] |
| 351 | add r9, r9, r5 |
| 352 | ldrb r5, [r1, #6] |
| 353 | cmp r9, #255 |
| 354 | mvnhi r9, r9, asr #31 |
| 355 | strb r9, [r1, #2] |
| 356 | ldrsh r9, [r0, #12] |
| 357 | add r10, r10, r6 |
| 358 | ldrb r6, [r1, #7] |
| 359 | cmp r10, #255 |
| 360 | mvnhi r10, r10, asr #31 |
| 361 | strb r10, [r1, #3] |
| 362 | ldrsh r10, [r0, #14] |
| 363 | str r11, [r0], #4 |
| 364 | add r7, r7, r3 |
| 365 | cmp r7, #255 |
| 366 | mvnhi r7, r7, asr #31 |
| 367 | strb r7, [r1, #4] |
| 368 | str r11, [r0], #4 |
| 369 | add r8, r8, r4 |
| 370 | cmp r8, #255 |
| 371 | mvnhi r8, r8, asr #31 |
| 372 | strb r8, [r1, #5] |
| 373 | str r11, [r0], #4 |
| 374 | add r9, r9, r5 |
| 375 | cmp r9, #255 |
| 376 | mvnhi r9, r9, asr #31 |
| 377 | strb r9, [r1, #6] |
| 378 | add r10, r10, r6 |
| 379 | cmp r10, #255 |
| 380 | mvnhi r10, r10, asr #31 |
| 381 | strb r10, [r1, #7] |
| 382 | str r11, [r0], #4 |
| 383 | add r1, r1, r2 |
| 384 | cmp r0, r12 |
| 385 | blo 2b |
| 386 | ldmfd sp!, { r4-r12, pc } |
| 387 | 3: |
| 388 | stmfd sp!, { r4-r11 } |
| 389 | ldrsh r1, [r0, #0] /* r1 = block[0] */ |
| 390 | mov r11, #0 |
| 391 | strh r11, [r0, #0] /* block[0] = 0 */ |
| 392 | strh r11, [r0, #126] /* block[63] = 0 */ |
| 393 | add r1, r1, #64 /* r1 = DC << 7 */ |
| 394 | add r0, r2, r3, asl #3 |
| 395 | 4: |
| 396 | ldrb r4, [r2, #0] |
| 397 | ldrb r5, [r2, #1] |
| 398 | ldrb r6, [r2, #2] |
| 399 | ldrb r7, [r2, #3] |
| 400 | ldrb r8, [r2, #4] |
| 401 | ldrb r9, [r2, #5] |
| 402 | ldrb r10, [r2, #6] |
| 403 | ldrb r11, [r2, #7] |
| 404 | add r4, r4, r1, asr #7 |
| 405 | cmp r4, #255 |
| 406 | mvnhi r4, r4, asr #31 |
| 407 | strb r4, [r2, #0] |
| 408 | add r5, r5, r1, asr #7 |
| 409 | cmp r5, #255 |
| 410 | mvnhi r5, r5, asr #31 |
| 411 | strb r5, [r2, #1] |
| 412 | add r6, r6, r1, asr #7 |
| 413 | cmp r6, #255 |
| 414 | mvnhi r6, r6, asr #31 |
| 415 | strb r6, [r2, #2] |
| 416 | add r7, r7, r1, asr #7 |
| 417 | cmp r7, #255 |
| 418 | mvnhi r7, r7, asr #31 |
| 419 | strb r7, [r2, #3] |
| 420 | add r8, r8, r1, asr #7 |
| 421 | cmp r8, #255 |
| 422 | mvnhi r8, r8, asr #31 |
| 423 | strb r8, [r2, #4] |
| 424 | add r9, r9, r1, asr #7 |
| 425 | cmp r9, #255 |
| 426 | mvnhi r9, r9, asr #31 |
| 427 | strb r9, [r2, #5] |
| 428 | add r10, r10, r1, asr #7 |
| 429 | cmp r10, #255 |
| 430 | mvnhi r10, r10, asr #31 |
| 431 | strb r10, [r2, #6] |
| 432 | add r11, r11, r1, asr #7 |
| 433 | cmp r11, #255 |
| 434 | mvnhi r11, r11, asr #31 |
| 435 | strb r11, [r2, #7] |
| 436 | add r2, r2, r3 |
| 437 | cmp r2, r0 |
| 438 | blo 4b |
| 439 | ldmfd sp!, { r4-r11 } |
| 440 | bx lr |