blob: de192e35498718d56a76be1df415bcae4be73df7 [file] [log] [blame]
Dave Chapmanc9d66562006-08-07 22:11:07 +00001/*
2 * idct.c
3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
5 *
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7 * See http://libmpeg2.sourceforge.net/ for updates.
8 *
9 * mpeg2dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * mpeg2dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24#include "plugin.h"
25
26#include "mpeg2dec_config.h"
27
28#include "mpeg2.h"
29#include "attributes.h"
30#include "mpeg2_internal.h"
31
Dave Chapmanc9d66562006-08-07 22:11:07 +000032/* idct main entry point */
33void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
34void (* mpeg2_idct_add) (int last, int16_t * block,
35 uint8_t * dest, int stride);
36
Jens Arnoldfc43b9d2007-10-16 22:55:40 +000037#ifdef CPU_COLDFIRE
38/* assembler functions */
39extern void mpeg2_idct_copy_coldfire(int16_t * block, uint8_t * dest,
40 const int stride);
41extern void mpeg2_idct_add_coldfire(const int last, int16_t * block,
42 uint8_t * dest, const int stride);
Jens Arnold45d43452007-10-17 00:29:44 +000043
44#elif defined CPU_ARM
45/* assembler functions */
46extern void mpeg2_idct_copy_arm(int16_t * block, uint8_t * dest,
47 const int stride);
48extern void mpeg2_idct_add_arm(const int last, int16_t * block,
49 uint8_t * dest, const int stride);
50
51#else /* !CPU_COLDFIE, !CPU_ARM */
52
53#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */
54#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */
55#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */
56#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */
57#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */
58#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */
59
60/*
61 * In legal streams, the IDCT output should be between -384 and +384.
62 * In corrupted streams, it is possible to force the IDCT output to go
63 * to +-3826 - this is the worst case for a column IDCT where the
64 * column inputs are 16-bit values.
65 */
66uint8_t mpeg2_clip[3840 * 2 + 256] IBSS_ATTR;
67#define CLIP(i) ((mpeg2_clip + 3840)[i])
Jens Arnoldfc43b9d2007-10-16 22:55:40 +000068
Dave Chapmanc9d66562006-08-07 22:11:07 +000069#if 0
Michael Sevakisf4b5a722007-04-14 16:35:44 +000070#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
71 do { \
72 t0 = W0 * d0 + W1 * d1; \
73 t1 = W0 * d1 - W1 * d0; \
74 } while (0)
Dave Chapmanc9d66562006-08-07 22:11:07 +000075#else
Michael Sevakisf4b5a722007-04-14 16:35:44 +000076#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
77 do { \
78 int tmp = W0 * (d0 + d1); \
79 t0 = tmp + (W1 - W0) * d1; \
80 t1 = tmp - (W1 + W0) * d0; \
81 } while (0)
Dave Chapmanc9d66562006-08-07 22:11:07 +000082#endif
83
84static inline void idct_row (int16_t * const block)
85{
86 int d0, d1, d2, d3;
87 int a0, a1, a2, a3, b0, b1, b2, b3;
88 int t0, t1, t2, t3;
89
90 /* shortcut */
91 if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] |
92 ((int32_t *)block)[3]))) {
93 uint32_t tmp = (uint16_t) (block[0] >> 1);
94 tmp |= tmp << 16;
95 ((int32_t *)block)[0] = tmp;
96 ((int32_t *)block)[1] = tmp;
97 ((int32_t *)block)[2] = tmp;
98 ((int32_t *)block)[3] = tmp;
99 return;
100 }
101
102 d0 = (block[0] << 11) + 2048;
103 d1 = block[1];
104 d2 = block[2] << 11;
105 d3 = block[3];
106 t0 = d0 + d2;
107 t1 = d0 - d2;
108 BUTTERFLY (t2, t3, W6, W2, d3, d1);
109 a0 = t0 + t2;
110 a1 = t1 + t3;
111 a2 = t1 - t3;
112 a3 = t0 - t2;
113
114 d0 = block[4];
115 d1 = block[5];
116 d2 = block[6];
117 d3 = block[7];
118 BUTTERFLY (t0, t1, W7, W1, d3, d0);
119 BUTTERFLY (t2, t3, W3, W5, d1, d2);
120 b0 = t0 + t2;
121 b3 = t1 + t3;
122 t0 -= t2;
123 t1 -= t3;
124 b1 = ((t0 + t1) >> 8) * 181;
125 b2 = ((t0 - t1) >> 8) * 181;
126
127 block[0] = (a0 + b0) >> 12;
128 block[1] = (a1 + b1) >> 12;
129 block[2] = (a2 + b2) >> 12;
130 block[3] = (a3 + b3) >> 12;
131 block[4] = (a3 - b3) >> 12;
132 block[5] = (a2 - b2) >> 12;
133 block[6] = (a1 - b1) >> 12;
134 block[7] = (a0 - b0) >> 12;
135}
136
137static inline void idct_col (int16_t * const block)
138{
139 int d0, d1, d2, d3;
140 int a0, a1, a2, a3, b0, b1, b2, b3;
141 int t0, t1, t2, t3;
142
143 d0 = (block[8*0] << 11) + 65536;
144 d1 = block[8*1];
145 d2 = block[8*2] << 11;
146 d3 = block[8*3];
147 t0 = d0 + d2;
148 t1 = d0 - d2;
149 BUTTERFLY (t2, t3, W6, W2, d3, d1);
150 a0 = t0 + t2;
151 a1 = t1 + t3;
152 a2 = t1 - t3;
153 a3 = t0 - t2;
154
155 d0 = block[8*4];
156 d1 = block[8*5];
157 d2 = block[8*6];
158 d3 = block[8*7];
159 BUTTERFLY (t0, t1, W7, W1, d3, d0);
160 BUTTERFLY (t2, t3, W3, W5, d1, d2);
161 b0 = t0 + t2;
162 b3 = t1 + t3;
163 t0 -= t2;
164 t1 -= t3;
165 b1 = ((t0 + t1) >> 8) * 181;
166 b2 = ((t0 - t1) >> 8) * 181;
167
168 block[8*0] = (a0 + b0) >> 17;
169 block[8*1] = (a1 + b1) >> 17;
170 block[8*2] = (a2 + b2) >> 17;
171 block[8*3] = (a3 + b3) >> 17;
172 block[8*4] = (a3 - b3) >> 17;
173 block[8*5] = (a2 - b2) >> 17;
174 block[8*6] = (a1 - b1) >> 17;
175 block[8*7] = (a0 - b0) >> 17;
176}
177
178static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest,
179 const int stride)
180{
181 int i;
182
183 for (i = 0; i < 8; i++)
184 idct_row (block + 8 * i);
185 for (i = 0; i < 8; i++)
186 idct_col (block + i);
187 do {
188 dest[0] = CLIP (block[0]);
189 dest[1] = CLIP (block[1]);
190 dest[2] = CLIP (block[2]);
191 dest[3] = CLIP (block[3]);
192 dest[4] = CLIP (block[4]);
193 dest[5] = CLIP (block[5]);
194 dest[6] = CLIP (block[6]);
195 dest[7] = CLIP (block[7]);
196
197 ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0;
198 ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0;
199
200 dest += stride;
201 block += 8;
202 } while (--i);
203}
204
205static void mpeg2_idct_add_c (const int last, int16_t * block,
206 uint8_t * dest, const int stride)
207{
208 int i;
209
210 if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
211 for (i = 0; i < 8; i++)
212 idct_row (block + 8 * i);
213 for (i = 0; i < 8; i++)
214 idct_col (block + i);
215 do {
216 dest[0] = CLIP (block[0] + dest[0]);
217 dest[1] = CLIP (block[1] + dest[1]);
218 dest[2] = CLIP (block[2] + dest[2]);
219 dest[3] = CLIP (block[3] + dest[3]);
220 dest[4] = CLIP (block[4] + dest[4]);
221 dest[5] = CLIP (block[5] + dest[5]);
222 dest[6] = CLIP (block[6] + dest[6]);
223 dest[7] = CLIP (block[7] + dest[7]);
224
225 ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0;
226 ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0;
227
228 dest += stride;
229 block += 8;
230 } while (--i);
231 } else {
232 int DC;
233
234 DC = (block[0] + 64) >> 7;
235 block[0] = block[63] = 0;
236 i = 8;
237 do {
238 dest[0] = CLIP (DC + dest[0]);
239 dest[1] = CLIP (DC + dest[1]);
240 dest[2] = CLIP (DC + dest[2]);
241 dest[3] = CLIP (DC + dest[3]);
242 dest[4] = CLIP (DC + dest[4]);
243 dest[5] = CLIP (DC + dest[5]);
244 dest[6] = CLIP (DC + dest[6]);
245 dest[7] = CLIP (DC + dest[7]);
246 dest += stride;
247 } while (--i);
248 }
249}
250
Jens Arnold45d43452007-10-17 00:29:44 +0000251#endif /* CPU selection */
Jens Arnoldfc43b9d2007-10-16 22:55:40 +0000252
Michael Sevakisf4b5a722007-04-14 16:35:44 +0000253void mpeg2_idct_init (void)
Dave Chapmanc9d66562006-08-07 22:11:07 +0000254{
Robert Kuklafd3fe452007-10-09 20:42:20 +0000255 extern uint8_t default_mpeg2_scan_norm[64];
256 extern uint8_t default_mpeg2_scan_alt[64];
Michael Sevakisf4b5a722007-04-14 16:35:44 +0000257 extern uint8_t mpeg2_scan_norm[64];
258 extern uint8_t mpeg2_scan_alt[64];
259 int i, j;
Dave Chapmanc9d66562006-08-07 22:11:07 +0000260
Jens Arnoldfc43b9d2007-10-16 22:55:40 +0000261#ifdef CPU_COLDFIRE
262 mpeg2_idct_copy = mpeg2_idct_copy_coldfire;
263 mpeg2_idct_add = mpeg2_idct_add_coldfire;
Jens Arnold45d43452007-10-17 00:29:44 +0000264#elif defined CPU_ARM
265 mpeg2_idct_copy = mpeg2_idct_copy_arm;
266 mpeg2_idct_add = mpeg2_idct_add_arm;
Jens Arnoldfc43b9d2007-10-16 22:55:40 +0000267#else
Michael Sevakisf4b5a722007-04-14 16:35:44 +0000268 mpeg2_idct_copy = mpeg2_idct_copy_c;
Jens Arnoldfc43b9d2007-10-16 22:55:40 +0000269 mpeg2_idct_add = mpeg2_idct_add_c;
Dave Chapmanc9d66562006-08-07 22:11:07 +0000270
Michael Sevakisf4b5a722007-04-14 16:35:44 +0000271 for (i = -3840; i < 3840 + 256; i++)
272 CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i);
Jens Arnoldb888fb22006-08-08 22:56:35 +0000273#endif
Michael Sevakisf4b5a722007-04-14 16:35:44 +0000274
275 for (i = 0; i < 64; i++)
276 {
Robert Kuklafd3fe452007-10-09 20:42:20 +0000277 j = default_mpeg2_scan_norm[i];
Michael Sevakisf4b5a722007-04-14 16:35:44 +0000278 mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
279
Robert Kuklafd3fe452007-10-09 20:42:20 +0000280 j = default_mpeg2_scan_alt[i];
Michael Sevakisf4b5a722007-04-14 16:35:44 +0000281 mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
Dave Chapmanc9d66562006-08-07 22:11:07 +0000282 }
283}