Michael Giacomelli | a16d0f3 | 2007-07-04 17:15:09 +0000 | [diff] [blame] | 1 | //#include "asf.h" |
| 2 | #include "wmadec.h" |
| 3 | #include "wmafixed.h" |
| 4 | #include <codecs.h> |
| 5 | |
| 6 | fixed64 IntTo64(int x){ |
| 7 | fixed64 res = 0; |
| 8 | unsigned char *p = (unsigned char *)&res; |
| 9 | |
| 10 | #ifdef ROCKBOX_BIG_ENDIAN |
| 11 | p[5] = x & 0xff; |
| 12 | p[4] = (x & 0xff00)>>8; |
| 13 | p[3] = (x & 0xff0000)>>16; |
| 14 | p[2] = (x & 0xff000000)>>24; |
| 15 | #else |
| 16 | p[2] = x & 0xff; |
| 17 | p[3] = (x & 0xff00)>>8; |
| 18 | p[4] = (x & 0xff0000)>>16; |
| 19 | p[5] = (x & 0xff000000)>>24; |
| 20 | #endif |
| 21 | return res; |
| 22 | } |
| 23 | |
| 24 | int IntFrom64(fixed64 x) |
| 25 | { |
| 26 | int res = 0; |
| 27 | unsigned char *p = (unsigned char *)&x; |
| 28 | |
| 29 | #ifdef ROCKBOX_BIG_ENDIAN |
| 30 | res = p[5] | (p[4]<<8) | (p[3]<<16) | (p[2]<<24); |
| 31 | #else |
| 32 | res = p[2] | (p[3]<<8) | (p[4]<<16) | (p[5]<<24); |
| 33 | #endif |
| 34 | return res; |
| 35 | } |
| 36 | |
| 37 | fixed32 Fixed32From64(fixed64 x) |
| 38 | { |
| 39 | return x & 0xFFFFFFFF; |
| 40 | } |
| 41 | |
| 42 | fixed64 Fixed32To64(fixed32 x) |
| 43 | { |
| 44 | return (fixed64)x; |
| 45 | } |
| 46 | |
| 47 | |
| 48 | /* |
| 49 | Fixed precision multiply code. |
| 50 | |
| 51 | */ |
| 52 | |
| 53 | /*Sign-15.16 format */ |
| 54 | #ifdef CPU_ARM |
| 55 | /* these are defines in wmafixed.h*/ |
| 56 | |
| 57 | |
| 58 | #elif defined(CPU_COLDFIRE) |
Michael Giacomelli | 51b3bbb | 2007-07-04 17:26:24 +0000 | [diff] [blame^] | 59 | inline int32_t fixmul32(int32_t x, int32_t y) |
Michael Giacomelli | a16d0f3 | 2007-07-04 17:15:09 +0000 | [diff] [blame] | 60 | { |
| 61 | #if PRECISION != 16 |
| 62 | #warning Coldfire fixmul32() only works for PRECISION == 16 |
| 63 | #endif |
| 64 | int32_t t1; |
| 65 | asm ( |
| 66 | "mac.l %[x], %[y], %%acc0 \n" /* multiply */ |
| 67 | "mulu.l %[y], %[x] \n" /* get lower half, avoid emac stall */ |
| 68 | "movclr.l %%acc0, %[t1] \n" /* get higher half */ |
| 69 | "lsr.l #1, %[t1] \n" |
| 70 | "move.w %[t1], %[x] \n" |
| 71 | "swap %[x] \n" |
| 72 | : /* outputs */ |
| 73 | [t1]"=&d"(t1), |
| 74 | [x] "+d" (x) |
| 75 | : /* inputs */ |
| 76 | [y] "d" (y) |
| 77 | ); |
| 78 | return x; |
| 79 | } |
| 80 | #else |
| 81 | |
| 82 | fixed32 fixmul32(fixed32 x, fixed32 y) |
| 83 | { |
| 84 | fixed64 temp; |
| 85 | temp = x; |
| 86 | temp *= y; |
| 87 | |
| 88 | temp >>= PRECISION; |
| 89 | |
| 90 | return (fixed32)temp; |
| 91 | } |
| 92 | |
| 93 | |
| 94 | |
| 95 | /* |
| 96 | Special fixmul32 that does a 16.16 x 1.31 multiply that returns a 16.16 value. |
| 97 | this is needed because the fft constants are all normalized to be less then 1 |
| 98 | and can't fit into a 16 bit number without excessive rounding |
| 99 | |
| 100 | |
| 101 | */ |
| 102 | |
| 103 | fixed32 fixmul32b(fixed32 x, fixed32 y) |
| 104 | { |
| 105 | fixed64 temp; |
| 106 | |
| 107 | temp = x; |
| 108 | temp *= y; |
| 109 | |
| 110 | temp >>= 31; //16+31-16 = 31 bits |
| 111 | |
| 112 | return (fixed32)temp; |
| 113 | } |
| 114 | |
| 115 | #endif |
| 116 | |
| 117 | |
| 118 | /* |
| 119 | Not performance senstitive code here |
| 120 | |
| 121 | */ |
| 122 | |
| 123 | |
| 124 | fixed64 fixmul64byfixed(fixed64 x, fixed32 y) |
| 125 | { |
| 126 | |
| 127 | //return x * y; |
| 128 | return (x * y); |
| 129 | // return (fixed64) fixmul32(Fixed32From64(x),y); |
| 130 | } |
| 131 | |
| 132 | |
| 133 | fixed32 fixdiv32(fixed32 x, fixed32 y) |
| 134 | { |
| 135 | fixed64 temp; |
| 136 | |
| 137 | if(x == 0) |
| 138 | return 0; |
| 139 | if(y == 0) |
| 140 | return 0x7fffffff; |
| 141 | temp = x; |
| 142 | temp <<= PRECISION; |
| 143 | return (fixed32)(temp / y); |
| 144 | } |
| 145 | |
| 146 | fixed64 fixdiv64(fixed64 x, fixed64 y) |
| 147 | { |
| 148 | fixed64 temp; |
| 149 | |
| 150 | if(x == 0) |
| 151 | return 0; |
| 152 | if(y == 0) |
| 153 | return 0x07ffffffffffffffLL; |
| 154 | temp = x; |
| 155 | temp <<= PRECISION64; |
| 156 | return (fixed64)(temp / y); |
| 157 | } |
| 158 | |
| 159 | fixed32 fixsqrt32(fixed32 x) |
| 160 | { |
| 161 | |
| 162 | unsigned long r = 0, s, v = (unsigned long)x; |
| 163 | |
| 164 | #define STEP(k) s = r + (1 << k * 2); r >>= 1; \ |
| 165 | if (s <= v) { v -= s; r |= (1 << k * 2); } |
| 166 | |
| 167 | STEP(15); |
| 168 | STEP(14); |
| 169 | STEP(13); |
| 170 | STEP(12); |
| 171 | STEP(11); |
| 172 | STEP(10); |
| 173 | STEP(9); |
| 174 | STEP(8); |
| 175 | STEP(7); |
| 176 | STEP(6); |
| 177 | STEP(5); |
| 178 | STEP(4); |
| 179 | STEP(3); |
| 180 | STEP(2); |
| 181 | STEP(1); |
| 182 | STEP(0); |
| 183 | |
| 184 | return (fixed32)(r << (PRECISION / 2)); |
| 185 | } |
| 186 | |
| 187 | |
| 188 | |
| 189 | /* Inverse gain of circular cordic rotation in s0.31 format. */ |
| 190 | static const long cordic_circular_gain = 0xb2458939; /* 0.607252929 */ |
| 191 | |
| 192 | /* Table of values of atan(2^-i) in 0.32 format fractions of pi where pi = 0xffffffff / 2 */ |
| 193 | static const unsigned long atan_table[] = { |
| 194 | 0x1fffffff, /* +0.785398163 (or pi/4) */ |
| 195 | 0x12e4051d, /* +0.463647609 */ |
| 196 | 0x09fb385b, /* +0.244978663 */ |
| 197 | 0x051111d4, /* +0.124354995 */ |
| 198 | 0x028b0d43, /* +0.062418810 */ |
| 199 | 0x0145d7e1, /* +0.031239833 */ |
| 200 | 0x00a2f61e, /* +0.015623729 */ |
| 201 | 0x00517c55, /* +0.007812341 */ |
| 202 | 0x0028be53, /* +0.003906230 */ |
| 203 | 0x00145f2e, /* +0.001953123 */ |
| 204 | 0x000a2f98, /* +0.000976562 */ |
| 205 | 0x000517cc, /* +0.000488281 */ |
| 206 | 0x00028be6, /* +0.000244141 */ |
| 207 | 0x000145f3, /* +0.000122070 */ |
| 208 | 0x0000a2f9, /* +0.000061035 */ |
| 209 | 0x0000517c, /* +0.000030518 */ |
| 210 | 0x000028be, /* +0.000015259 */ |
| 211 | 0x0000145f, /* +0.000007629 */ |
| 212 | 0x00000a2f, /* +0.000003815 */ |
| 213 | 0x00000517, /* +0.000001907 */ |
| 214 | 0x0000028b, /* +0.000000954 */ |
| 215 | 0x00000145, /* +0.000000477 */ |
| 216 | 0x000000a2, /* +0.000000238 */ |
| 217 | 0x00000051, /* +0.000000119 */ |
| 218 | 0x00000028, /* +0.000000060 */ |
| 219 | 0x00000014, /* +0.000000030 */ |
| 220 | 0x0000000a, /* +0.000000015 */ |
| 221 | 0x00000005, /* +0.000000007 */ |
| 222 | 0x00000002, /* +0.000000004 */ |
| 223 | 0x00000001, /* +0.000000002 */ |
| 224 | 0x00000000, /* +0.000000001 */ |
| 225 | 0x00000000, /* +0.000000000 */ |
| 226 | }; |
| 227 | |
| 228 | |
| 229 | /* |
| 230 | |
| 231 | Below here functions do not use standard fixed precision! |
| 232 | */ |
| 233 | |
| 234 | |
| 235 | /** |
| 236 | * Implements sin and cos using CORDIC rotation. |
| 237 | * |
| 238 | * @param phase has range from 0 to 0xffffffff, representing 0 and |
| 239 | * 2*pi respectively. |
| 240 | * @param cos return address for cos |
| 241 | * @return sin of phase, value is a signed value from LONG_MIN to LONG_MAX, |
| 242 | * representing -1 and 1 respectively. |
| 243 | * |
| 244 | * Gives at least 24 bits precision (last 2-8 bits or so are probably off) |
| 245 | */ |
| 246 | long fsincos(unsigned long phase, fixed32 *cos) |
| 247 | { |
| 248 | int32_t x, x1, y, y1; |
| 249 | unsigned long z, z1; |
| 250 | int i; |
| 251 | |
| 252 | /* Setup initial vector */ |
| 253 | x = cordic_circular_gain; |
| 254 | y = 0; |
| 255 | z = phase; |
| 256 | |
| 257 | /* The phase has to be somewhere between 0..pi for this to work right */ |
| 258 | if (z < 0xffffffff / 4) { |
| 259 | /* z in first quadrant, z += pi/2 to correct */ |
| 260 | x = -x; |
| 261 | z += 0xffffffff / 4; |
| 262 | } else if (z < 3 * (0xffffffff / 4)) { |
| 263 | /* z in third quadrant, z -= pi/2 to correct */ |
| 264 | z -= 0xffffffff / 4; |
| 265 | } else { |
| 266 | /* z in fourth quadrant, z -= 3pi/2 to correct */ |
| 267 | x = -x; |
| 268 | z -= 3 * (0xffffffff / 4); |
| 269 | } |
| 270 | |
| 271 | /* Each iteration adds roughly 1-bit of extra precision */ |
| 272 | for (i = 0; i < 31; i++) { |
| 273 | x1 = x >> i; |
| 274 | y1 = y >> i; |
| 275 | z1 = atan_table[i]; |
| 276 | |
| 277 | /* Decided which direction to rotate vector. Pivot point is pi/2 */ |
| 278 | if (z >= 0xffffffff / 4) { |
| 279 | x -= y1; |
| 280 | y += x1; |
| 281 | z -= z1; |
| 282 | } else { |
| 283 | x += y1; |
| 284 | y -= x1; |
| 285 | z += z1; |
| 286 | } |
| 287 | } |
| 288 | |
| 289 | if (cos) |
| 290 | *cos = x; |
| 291 | |
| 292 | return y; |
| 293 | } |
| 294 | |
| 295 | |
| 296 | /* |
| 297 | Old trig functions. Still used in 1 place each. |
| 298 | |
| 299 | */ |
| 300 | |
| 301 | fixed32 fixsin32(fixed32 x) |
| 302 | { |
| 303 | |
| 304 | fixed64 x2, temp; |
| 305 | int sign = 1; |
| 306 | |
| 307 | if(x < 0) |
| 308 | { |
| 309 | sign = -1; |
| 310 | x = -x; |
| 311 | } |
| 312 | while (x > 0x19220) |
| 313 | { |
| 314 | x -= M_PI_F; |
| 315 | sign = -sign; |
| 316 | } |
| 317 | if (x > 0x19220) |
| 318 | { |
| 319 | x = M_PI_F - x; |
| 320 | } |
| 321 | x2 = (fixed64)x * x; |
| 322 | x2 >>= PRECISION; |
| 323 | if(sign != 1) |
| 324 | { |
| 325 | x = -x; |
| 326 | } |
| 327 | /** |
| 328 | temp = ftofix32(-.0000000239f) * x2; |
| 329 | temp >>= PRECISION; |
| 330 | **/ |
| 331 | temp = 0; // PJJ |
| 332 | //temp = (temp + 0x0) * x2; //MGG: this can't possibly do anything? |
| 333 | //temp >>= PRECISION; |
| 334 | temp = (temp - 0xd) * x2; |
| 335 | temp >>= PRECISION; |
| 336 | temp = (temp + 0x222) * x2; |
| 337 | temp >>= PRECISION; |
| 338 | temp = (temp - 0x2aab) * x2; |
| 339 | temp >>= PRECISION; |
| 340 | temp += 0x10000; |
| 341 | temp = temp * x; |
| 342 | temp >>= PRECISION; |
| 343 | |
| 344 | return (fixed32)(temp); |
| 345 | } |
| 346 | |
| 347 | fixed32 fixcos32(fixed32 x) |
| 348 | { |
| 349 | return fixsin32(x - (M_PI_F>>1))*-1; |
| 350 | } |