blob: 5b43c3bb5bc58c786ca1459b227adf35f6a35572 [file] [log] [blame]
Thom Johansen56f2ca72006-08-16 12:38:49 +00001/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
Thom Johansen9636c1b2007-02-27 17:33:23 +000010 * Copyright (C) 2006-2007 Thom Johansen
Thom Johansen56f2ca72006-08-16 12:38:49 +000011 *
Daniel Stenberg2acc0ac2008-06-28 18:10:04 +000012 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
Thom Johansen56f2ca72006-08-16 12:38:49 +000016 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21
Thom Johansen1b05ea82007-03-11 23:33:58 +000022/****************************************************************************
Andree Buschmannfd052ec2008-03-19 13:55:53 +000023 * void channels_process_sound_chan_mono(int count, int32_t *buf[])
24 *
25 * NOTE: The following code processes two samples at once. When count is odd,
26 * there is an additional obsolete sample processed, which will not be
27 * used by the calling functions.
28 */
29 .section .icode, "ax", %progbits
30 .align 2
31 .global channels_process_sound_chan_mono
32 .type channels_process_sound_chan_mono, %function
33channels_process_sound_chan_mono:
34 @ input: r0 = count, r1 = buf
35 stmfd sp!, {r4-r6, lr}
36 ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
37
38.monoloop:
39 ldmia r2, {r4-r5}
40 ldmia r3, {r6,lr}
41 mov r4, r4, asr #1 @ r4 = r4/2
42 add r4, r4, r6, asr #1 @ r4 = r4 + r6/2 = (buf[0]+buf[1])/2
43 mov r5, r5, asr #1 @ r5 = r5/2
44 add r5, r5, lr, asr #1 @ r5 = r5 + lr/2 = (buf[0]+buf[1])/2
45 stmia r2!, {r4-r5}
46 stmia r3!, {r4-r5}
47 subs r0, r0, #2
48 bgt .monoloop
49
50 ldmfd sp!, {r4-r6, pc}
51.monoend:
52 .size channels_process_sound_chan_mono,.monoend-channels_process_sound_chan_mono
53
54/****************************************************************************
55 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
56 * NOTE: The following code processes two samples at once. When count is odd,
57 * there is an additional obsolete sample processed, which will not be
58 * used by the calling functions.
59 */
60 .section .icode, "ax", %progbits
61 .align 2
62 .global channels_process_sound_chan_karaoke
63 .type channels_process_sound_chan_karaoke, %function
64channels_process_sound_chan_karaoke:
65 @ input: r0 = count, r1 = buf
66 stmfd sp!, {r4-r6, lr}
67 ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
68
69.karaokeloop:
70 ldmia r2, {r4-r5}
71 ldmia r3, {r6,lr}
72 mov r6, r6, asr #1 @ r6 = r6/2
73 rsb r4, r6, r4, asr #1 @ r4 = -r6 + r4/2 = (buf[0]-buf[1])/2
74 rsb r6, r4, #0 @ r6 = -r4
75 mov lr, lr, asr #1 @ lr = lr/2
76 rsb r5, lr, r5, asr #1 @ r5 = -lr + r5/2 = (buf[0]-buf[1])/2
77 rsb lr, r5, #0 @ lr = -r5
78 stmia r2!, {r4-r5}
79 stmia r3!, {r6,lr}
80 subs r0, r0, #2
81 bgt .karaokeloop
82
83 ldmfd sp!, {r4-r6, pc}
84.karaokeend:
85 .size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke
86
87/****************************************************************************
88 * void sample_output_mono(int count, struct dsp_data *data,
89 int32_t *src[], int16_t *dst)
90 * NOTE: The following code processes two samples at once. When count is odd,
91 * there is an additional obsolete sample processed, which will not be
92 * used by the calling functions.
93 */
94 .section .icode, "ax", %progbits
95 .align 2
96 .global sample_output_mono
97 .type sample_output_mono, %function
98sample_output_mono:
99 @ input: r0 = count, r1 = data, r2 = src, r3 = dst
100 stmfd sp!, {r4-r9, lr}
101
102 ldr r4, [r2] @ r4 = src[0]
103 ldr r5, [r1] @ lr = data->output_scale
104 sub r1, r5, #1 @ r1 = r5-1
105 mov r2, #1
106 mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
107 mvn r1, #0x8000 @ r1 needed for clipping
108 mov r8, #0xff00
109 orr r8, r8, #0xff @ r8 needed for masking
110
111.somloop:
112 ldmia r4!, {r6-r7}
113 add r6, r6, r2
114 mov r6, r6, asr r5 @ r6 = (r6 + 1<<(scale-1)) >> scale
115 mov lr, r6, asr #15
116 teq lr, lr, asr #31
117 eorne r6, r1, lr, asr #31 @ Clip (-32768...+32767)
118 add r7, r7, r2
119 mov r7, r7, asr r5 @ r7 = (r7 + 1<<(scale-1)) >> scale
120 mov lr, r7, asr #15
121 teq lr, lr, asr #31
122 eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
123
124 and r6, r6, r8
125 orr r6, r6, r6, asl #16 @ pack first 2 halfwords into 1 word
126 and r7, r7, r8
127 orr r7, r7, r7, asl #16 @ pack last 2 halfwords into 1 word
128 stmia r3!, {r6-r7}
129
130 subs r0, r0, #2
131 bgt .somloop
132
133 ldmfd sp!, {r4-r9, pc}
134.somend:
135 .size sample_output_mono,.somend-sample_output_mono
136
137/****************************************************************************
138 * void sample_output_stereo(int count, struct dsp_data *data,
139 int32_t *src[], int16_t *dst)
140 * NOTE: The following code processes two samples at once. When count is odd,
141 * there is an additional obsolete sample processed, which will not be
142 * used by the calling functions.
143 */
144 .section .icode, "ax", %progbits
145 .align 2
146 .global sample_output_stereo
147 .type sample_output_stereo, %function
148sample_output_stereo:
149 @ input: r0 = count, r1 = data, r2 = src, r3 = dst
150 stmfd sp!, {r4-r11, lr}
151
152 ldmia r2, {r4-r5} @ r4 = src[0], r5 = src[1]
153 ldr r6, [r1] @ r6 = data->output_scale
154 sub r1, r6, #1 @ r1 = r6-1
155 mov r2, #1
156 mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
157 mvn r1, #0x8000 @ r1 needed for clipping
158 mov r11, #0xff00
159 orr r11, r11, #0xff @ r11 needed for masking
160
161.sosloop:
162 ldmia r4!, {r7-r8}
163 add r7, r7, r2
164 mov r7, r7, asr r6 @ r7 = (r7 + 1<<(scale-1)) >> scale
165 mov lr, r7, asr #15
166 teq lr, lr, asr #31
167 eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
168 add r8, r8, r2
169 mov r8, r8, asr r6 @ r8 = (r8 + 1<<(scale-1)) >> scale
170 mov lr, r8, asr #15
171 teq lr, lr, asr #31
172 eorne r8, r1, lr, asr #31 @ Clip (-32768...+32767)
173
174 ldmia r5!, {r9-r10}
175 add r9, r9, r2
176 mov r9, r9, asr r6 @ r9 = (r9 + 1<<(scale-1)) >> scale
177 mov lr, r9, asr #15
178 teq lr, lr, asr #31
179 eorne r9, r1, lr, asr #31 @ Clip (-32768...+32767)
180 add r10, r10, r2
181 mov r10, r10, asr r6 @ r10 = (r10 + 1<<(scale-1)) >> scale
182 mov lr, r10, asr #15
183 teq lr, lr, asr #31
184 eorne r10, r1, lr, asr #31 @ Clip (-32768...+32767)
185
186 and r7, r7, r11
187 orr r9, r7, r9, asl #16 @ pack first 2 halfwords into 1 word
188 and r8, r8, r11
189 orr r10, r8, r10, asl #16 @ pack last 2 halfwords into 1 word
190 stmia r3!, {r9-r10}
191
192 subs r0, r0, #2
193 bgt .sosloop
194
195 ldmfd sp!, {r4-r11, pc}
196.sosend:
197 .size sample_output_stereo,.sosend-sample_output_stereo
198
199/****************************************************************************
Thom Johansen9636c1b2007-02-27 17:33:23 +0000200 * void apply_crossfeed(int count, int32_t* src[])
201 */
Thom Johansen56f2ca72006-08-16 12:38:49 +0000202 .section .text
203 .global apply_crossfeed
204apply_crossfeed:
205 @ unfortunately, we ended up in a bit of a register squeeze here, and need
Thom Johansen1b05ea82007-03-11 23:33:58 +0000206 @ to keep the count on the stack :/
Thom Johansen9636c1b2007-02-27 17:33:23 +0000207 stmdb sp!, { r4-r11, lr } @ stack modified regs
208 ldmia r1, { r2-r3 } @ r2 = src[0], r3 = src[1]
Thom Johansen56f2ca72006-08-16 12:38:49 +0000209
Thom Johansen9636c1b2007-02-27 17:33:23 +0000210 ldr r1, =crossfeed_data
211 ldmia r1!, { r4-r11 } @ load direct gain and filter data
212 add r12, r1, #13*4*2 @ calculate end of delay
213 stmdb sp!, { r0, r12 } @ stack count and end of delay adr
214 ldr r0, [r1, #13*4*2] @ fetch current delay line address
215
Thom Johansen56f2ca72006-08-16 12:38:49 +0000216 /* Register usage in loop:
217 * r0 = &delay[index][0], r1 = accumulator high, r2 = src[0], r3 = src[1],
218 * r4 = direct gain, r5-r7 = b0, b1, a1 (filter coefs),
219 * r8-r11 = filter history, r12 = temp, r14 = accumulator low
220 */
221.cfloop:
Thom Johansen9636c1b2007-02-27 17:33:23 +0000222 smull r14, r1, r6, r8 @ acc = b1*dr[n - 1]
223 smlal r14, r1, r7, r9 @ acc += a1*y_l[n - 1]
224 ldr r8, [r0, #4] @ r8 = dr[n]
225 smlal r14, r1, r5, r8 @ acc += b0*dr[n]
226 mov r9, r1, lsl #1 @ fix format for filter history
227 ldr r12, [r2] @ load left input
228 smlal r14, r1, r4, r12 @ acc += gain*x_l[n]
229 mov r1, r1, lsl #1 @ fix format
230 str r1, [r2], #4 @ save result
Thom Johansen56f2ca72006-08-16 12:38:49 +0000231
Thom Johansen9636c1b2007-02-27 17:33:23 +0000232 smull r14, r1, r6, r10 @ acc = b1*dl[n - 1]
233 smlal r14, r1, r7, r11 @ acc += a1*y_r[n - 1]
234 ldr r10, [r0] @ r10 = dl[n]
235 str r12, [r0], #4 @ save left input to delay line
236 smlal r14, r1, r5, r10 @ acc += b0*dl[n]
237 mov r11, r1, lsl #1 @ fix format for filter history
238 ldr r12, [r3] @ load right input
239 smlal r14, r1, r4, r12 @ acc += gain*x_r[n]
240 str r12, [r0], #4 @ save right input to delay line
241 mov r1, r1, lsl #1 @ fix format
242 str r1, [r3], #4 @ save result
243
244 ldr r12, [sp, #4] @ fetch delay line end addr from stack
245 cmp r0, r12 @ need to wrap to start of delay?
246 subeq r0, r0, #13*4*2 @ wrap back delay line ptr to start
247
248 ldr r1, [sp] @ fetch count from stack
249 subs r1, r1, #1 @ are we finished?
250 strne r1, [sp] @ nope, save count back to stack
251 bne .cfloop
Thom Johansen56f2ca72006-08-16 12:38:49 +0000252
253 @ save data back to struct
Thom Johansen9636c1b2007-02-27 17:33:23 +0000254 ldr r12, =crossfeed_data + 4*4
255 stmia r12, { r8-r11 } @ save filter history
Thom Johansen1b05ea82007-03-11 23:33:58 +0000256 str r0, [r12, #30*4] @ save delay line index
Thom Johansen9636c1b2007-02-27 17:33:23 +0000257 add sp, sp, #8 @ remove temp variables from stack
258 ldmia sp!, { r4-r11, pc }
Thom Johansen1b05ea82007-03-11 23:33:58 +0000259.cfend:
260 .size apply_crossfeed,.cfend-apply_crossfeed
261
262/****************************************************************************
263 * int dsp_downsample(int count, struct dsp_data *data,
264 * in32_t *src[], int32_t *dst[])
265 */
266 .section .text
267 .global dsp_downsample
268dsp_downsample:
269 stmdb sp!, { r4-r11, lr } @ stack modified regs
270 ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta
271 sub r5, r5, #1 @ pre-decrement num_channels for use
272 add r4, r1, #12 @ r4 = &resample_data.phase
273 mov r12, #0xff
274 orr r12, r12, #0xff00 @ r12 = 0xffff
275.dschannel_loop:
276 ldr r1, [r4] @ r1 = resample_data.phase
277 ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1]
278 ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1]
279 add r9, r4, #4 @ r9 = &last_sample[0]
280 ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1]
281 sub r11, r0, #1
282 ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ...
283 str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample
284 movs r9, r1, lsr #16 @ r9 = pos = phase >> 16
285 ldreq r11, [r7] @ if pos = 0, load src[0] and jump into loop
286 beq .dsuse_last_start
287 cmp r9, r0 @ if pos >= count, we're already done
288 bge .dsloop_skip
289
290 @ Register usage in loop:
291 @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
292 @ r6 = delta, r7 = s, r8 = d, r9 = pos, r10 = s[pos - 1], r11 = s[pos]
293.dsloop:
294 add r9, r7, r9, lsl #2 @ r9 = &s[pos]
295 ldmda r9, { r10, r11 } @ r10 = s[pos - 1], r11 = s[pos]
296.dsuse_last_start:
297 sub r11, r11, r10 @ r11 = diff = s[pos] - s[pos - 1]
298 @ keep frac in lower bits to take advantage of multiplier early termination
299 and r9, r1, r12 @ frac = phase & 0xffff
300 smull r9, r14, r11, r9
301 add r10, r10, r14, lsl #16
302 add r10, r10, r9, lsr #16 @ r10 = out = s[pos - 1] + frac*diff
303 str r10, [r8], #4 @ *d++ = out
304 add r1, r1, r6 @ phase += delta
305 mov r9, r1, lsr #16 @ pos = phase >> 16
306 cmp r9, r0 @ pos < count?
307 blt .dsloop @ yup, do more samples
308.dsloop_skip:
309 subs r5, r5, #1
310 bpl .dschannel_loop @ if (--ch) >= 0, do another channel
311 sub r1, r1, r0, lsl #16 @ wrap phase back to start
312 str r1, [r4] @ store back
313 ldr r1, [r3] @ r1 = &dst[0]
314 sub r8, r8, r1 @ dst - &dst[0]
315 mov r0, r8, lsr #2 @ convert bytes->samples
316 ldmia sp!, { r4-r11, pc } @ ... and we're out
317.dsend:
318 .size dsp_downsample,.dsend-dsp_downsample
319
320/****************************************************************************
321 * int dsp_upsample(int count, struct dsp_data *dsp,
322 * in32_t *src[], int32_t *dst[])
323 */
324 .section .text
325 .global dsp_upsample
326dsp_upsample:
327 stmdb sp!, { r4-r11, lr } @ stack modified regs
328 ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta
329 sub r5, r5, #1 @ pre-decrement num_channels for use
330 add r4, r1, #12 @ r4 = &resample_data.phase
331 stmdb sp!, { r0, r4 } @ stack count and &resample_data.phase
332.uschannel_loop:
333 ldr r12, [r4] @ r12 = resample_data.phase
334 mov r1, r12, ror #16 @ swap halfword positions, we'll use carry
335 @ to detect pos increments
336 ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1]
337 ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1]
338 add r9, r4, #4 @ r9 = &last_sample[0]
339 ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1]
340 sub r11, r0, #1
341 ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ...
342 str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample
343 add r9, r7, r0, lsl #2 @ r9 = src_end = &src[count]
344 movs r14, r12, lsr #16 @ pos = resample_data.phase >> 16
345 beq .usstart_0 @ pos = 0
346 cmp r14, r0 @ if pos >= count, we're already done
347 bge .usloop_skip
348 add r7, r7, r14, lsl #2 @ r7 = &s[pos]
349 ldr r10, [r7, #-4] @ r11 = s[pos - 1]
350 b .usstart_0
351
352 @ Register usage in loop:
353 @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
354 @ r6 = delta, r7 = s, r8 = d, r9 = src_end, r10 = s[pos - 1], r11 = s[pos]
355.usloop_1:
356 mov r10, r11 @ r10 = previous sample
357.usstart_0:
358 ldr r11, [r7], #4 @ r11 = next sample
359 sub r0, r11, r10 @ r0 = s[pos] - s[pos - 1]
360.usloop_0:
361 mov r4, r1, lsr #16 @ r4 = frac = phase >> 16
362 smull r12, r14, r4, r0
363 add r14, r10, r14, lsl #16
364 add r14, r14, r12, lsr #16 @ r14 = out = s[pos - 1] + frac*diff
365 str r14, [r8], #4 @ *d++ = out
366 adds r1, r1, r6, lsl #16 @ phase += delta << 16
367 bcc .usloop_0 @ if carry is set, pos is incremented
368 cmp r7, r9 @ if s < src_end, do another sample
369 blo .usloop_1
370.usloop_skip:
371 subs r5, r5, #1
372 ldmia sp, { r0, r4 } @ reload count and &resample_data.phase
373 bpl .uschannel_loop @ if (--ch) >= 0, do another channel
374 mov r1, r1, ror #16 @ wrap phase back to start of next frame
375 str r1, [r4] @ store back
376 ldr r1, [r3] @ r1 = &dst[0]
377 sub r8, r8, r1 @ dst - &dst[0]
378 mov r0, r8, lsr #2 @ convert bytes->samples
379 add sp, sp, #8 @ adjust stack for temp variables
380 ldmia sp!, { r4-r11, pc } @ ... and we're out
381.usend:
382 .size dsp_upsample,.usend-dsp_upsample
Thom Johansen56f2ca72006-08-16 12:38:49 +0000383