blob: 291d6ec2c8679887efaf5e5d85aef3b75f0f9bcd [file] [log] [blame]
Adam Gashlinb73960d2007-02-14 03:34:55 +00001/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 *
Michael Sevakis9764e092007-03-03 03:28:13 +00009 * $Id$
10 *
Adam Gashlinb73960d2007-02-14 03:34:55 +000011 * Copyright (C) 2006-2007 Adam Gashlin (hcs)
12 * Copyright (C) 2004-2007 Shay Green (blargg)
13 * Copyright (C) 2002 Brad Martin
14 *
15 * All files in this archive are subject to the GNU General Public License.
16 * See the file COPYING in the source tree root for full license agreement.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ****************************************************************************/
Michael Sevakis46bb37a2007-02-20 13:06:11 +000022
Adam Gashlinb73960d2007-02-14 03:34:55 +000023/* The DSP portion (awe!) */
24
25enum { voice_count = 8 };
26enum { register_count = 128 };
27
28struct raw_voice_t
29{
30 int8_t volume [2];
31 uint8_t rate [2];
32 uint8_t waveform;
33 uint8_t adsr [2]; /* envelope rates for attack, decay, and sustain */
34 uint8_t gain; /* envelope gain (if not using ADSR) */
35 int8_t envx; /* current envelope level */
36 int8_t outx; /* current sample */
37 int8_t unused [6];
38};
39
40struct globals_t
41{
42 int8_t unused1 [12];
43 int8_t volume_0; /* 0C Main Volume Left (-.7) */
44 int8_t echo_feedback; /* 0D Echo Feedback (-.7) */
45 int8_t unused2 [14];
46 int8_t volume_1; /* 1C Main Volume Right (-.7) */
47 int8_t unused3 [15];
48 int8_t echo_volume_0; /* 2C Echo Volume Left (-.7) */
49 uint8_t pitch_mods; /* 2D Pitch Modulation on/off for each voice */
50 int8_t unused4 [14];
51 int8_t echo_volume_1; /* 3C Echo Volume Right (-.7) */
52 uint8_t noise_enables; /* 3D Noise output on/off for each voice */
53 int8_t unused5 [14];
54 uint8_t key_ons; /* 4C Key On for each voice */
55 uint8_t echo_ons; /* 4D Echo on/off for each voice */
56 int8_t unused6 [14];
57 uint8_t key_offs; /* 5C key off for each voice
58 (instantiates release mode) */
59 uint8_t wave_page; /* 5D source directory (wave table offsets) */
60 int8_t unused7 [14];
61 uint8_t flags; /* 6C flags and noise freq */
62 uint8_t echo_page; /* 6D */
63 int8_t unused8 [14];
64 uint8_t wave_ended; /* 7C */
65 uint8_t echo_delay; /* 7D ms >> 4 */
66 char unused9 [2];
67};
68
69enum state_t { /* -1, 0, +1 allows more efficient if statements */
70 state_decay = -1,
71 state_sustain = 0,
72 state_attack = +1,
73 state_release = 2
74};
75
76struct cache_entry_t
77{
78 int16_t const* samples;
79 unsigned end; /* past-the-end position */
80 unsigned loop; /* number of samples in loop */
81 unsigned start_addr;
82};
83
84enum { brr_block_size = 16 };
85
86struct voice_t
87{
88#if SPC_BRRCACHE
89 int16_t const* samples;
90 long wave_end;
91 int wave_loop;
92#else
93 int16_t samples [3 + brr_block_size + 1];
94 int block_header; /* header byte from current block */
95#endif
96 uint8_t const* addr;
97 short volume [2];
98 long position;/* position in samples buffer, with 12-bit fraction */
99 short envx;
100 short env_mode;
101 short env_timer;
102 short key_on_delay;
103};
104
105#if SPC_BRRCACHE
106/* a little extra for samples that go past end */
107static int16_t BRRcache [0x20000 + 32];
108#endif
109
110enum { fir_buf_half = 8 };
111
Michael Sevakisd31162a2007-02-20 10:27:39 +0000112#ifdef CPU_COLDFIRE
113/* global because of the large aligment requirement for hardware masking -
114 * L-R interleaved 16-bit samples for easy loading and mac.w use.
115 */
116enum
117{
118 fir_buf_size = fir_buf_half * sizeof ( int32_t ),
119 fir_buf_mask = ~fir_buf_size
120};
121int32_t fir_buf[fir_buf_half]
122 __attribute__ ((aligned (fir_buf_size*2))) IBSS_ATTR;
123#endif /* CPU_COLDFIRE */
124
Adam Gashlinb73960d2007-02-14 03:34:55 +0000125struct Spc_Dsp
126{
127 union
128 {
129 struct raw_voice_t voice [voice_count];
130 uint8_t reg [register_count];
131 struct globals_t g;
132 int16_t align;
133 } r;
134
135 unsigned echo_pos;
136 int keys_down;
137 int noise_count;
138 uint16_t noise; /* also read as int16_t */
139
Michael Sevakisd31162a2007-02-20 10:27:39 +0000140#ifdef CPU_COLDFIRE
141 /* circularly hardware masked address */
142 int32_t *fir_ptr;
143 /* wrapped address just behind current position -
144 allows mac.w to increment and mask fir_ptr */
145 int32_t *last_fir_ptr;
146 /* copy of echo FIR constants as int16_t for use with mac.w */
147 int16_t fir_coeff[voice_count];
148#else
Adam Gashlinb73960d2007-02-14 03:34:55 +0000149 /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */
150 int fir_pos; /* (0 to 7) */
151 int fir_buf [fir_buf_half * 2] [2];
152 /* copy of echo FIR constants as int, for faster access */
153 int fir_coeff [voice_count];
Michael Sevakisd31162a2007-02-20 10:27:39 +0000154#endif
Adam Gashlinb73960d2007-02-14 03:34:55 +0000155
156 struct voice_t voice_state [voice_count];
157
158#if SPC_BRRCACHE
159 uint8_t oldsize;
160 struct cache_entry_t wave_entry [256];
161 struct cache_entry_t wave_entry_old [256];
162#endif
163};
164
165struct src_dir
166{
167 char start [2];
168 char loop [2];
169};
170
171static void DSP_reset( struct Spc_Dsp* this )
172{
173 this->keys_down = 0;
174 this->echo_pos = 0;
175 this->noise_count = 0;
176 this->noise = 2;
Adam Gashlinb73960d2007-02-14 03:34:55 +0000177
178 this->r.g.flags = 0xE0; /* reset, mute, echo off */
179 this->r.g.key_ons = 0;
180
181 memset( this->voice_state, 0, sizeof this->voice_state );
182
183 int i;
184 for ( i = voice_count; --i >= 0; )
185 {
186 struct voice_t* v = this->voice_state + i;
187 v->env_mode = state_release;
188 v->addr = ram.ram;
189 }
190
191 #if SPC_BRRCACHE
192 this->oldsize = 0;
193 for ( i = 0; i < 256; i++ )
194 this->wave_entry [i].start_addr = -1;
195 #endif
Michael Sevakisd31162a2007-02-20 10:27:39 +0000196
197#ifdef CPU_COLDFIRE
198 this->fir_ptr = fir_buf;
199 this->last_fir_ptr = &fir_buf [7];
200 memset( fir_buf, 0, sizeof fir_buf );
201#else
202 this->fir_pos = 0;
Adam Gashlinb73960d2007-02-14 03:34:55 +0000203 memset( this->fir_buf, 0, sizeof this->fir_buf );
Michael Sevakisd31162a2007-02-20 10:27:39 +0000204#endif
205
Adam Gashlinb73960d2007-02-14 03:34:55 +0000206 assert( offsetof (struct globals_t,unused9 [2]) == register_count );
207 assert( sizeof (this->r.voice) == register_count );
208}
209
210static void DSP_write( struct Spc_Dsp* this, int i, int data ) ICODE_ATTR;
211static void DSP_write( struct Spc_Dsp* this, int i, int data )
212{
213 assert( (unsigned) i < register_count );
214
215 this->r.reg [i] = data;
216 int high = i >> 4;
217 int low = i & 0x0F;
218 if ( low < 2 ) /* voice volumes */
219 {
220 int left = *(int8_t const*) &this->r.reg [i & ~1];
221 int right = *(int8_t const*) &this->r.reg [i | 1];
222 struct voice_t* v = this->voice_state + high;
223 v->volume [0] = left;
224 v->volume [1] = right;
225 }
226 else if ( low == 0x0F ) /* fir coefficients */
227 {
228 this->fir_coeff [7 - high] = (int8_t) data; /* sign-extend */
229 }
230}
231
232static inline int DSP_read( struct Spc_Dsp* this, int i )
233{
234 assert( (unsigned) i < register_count );
235 return this->r.reg [i];
236}
237
238/* if ( n < -32768 ) out = -32768; */
239/* if ( n > 32767 ) out = 32767; */
240#define CLAMP16( n, out )\
241{\
242 if ( (int16_t) n != n )\
243 out = 0x7FFF ^ (n >> 31);\
244}
245
246#if SPC_BRRCACHE
247static void decode_brr( struct Spc_Dsp* this, unsigned start_addr,
248 struct voice_t* voice,
249 struct raw_voice_t const* const raw_voice ) ICODE_ATTR;
250static void decode_brr( struct Spc_Dsp* this, unsigned start_addr,
251 struct voice_t* voice,
252 struct raw_voice_t const* const raw_voice )
253{
254 /* setup same variables as where decode_brr() is called from */
255 #undef RAM
256 #define RAM ram.ram
257 struct src_dir const* const sd =
258 (struct src_dir*) &RAM [this->r.g.wave_page * 0x100];
259 struct cache_entry_t* const wave_entry =
260 &this->wave_entry [raw_voice->waveform];
261
262 /* the following block can be put in place of the call to
263 decode_brr() below
264 */
265 {
266 DEBUGF( "decode at %08x (wave #%d)\n",
267 start_addr, raw_voice->waveform );
268
269 /* see if in cache */
270 int i;
271 for ( i = 0; i < this->oldsize; i++ )
272 {
273 struct cache_entry_t* e = &this->wave_entry_old [i];
274 if ( e->start_addr == start_addr )
275 {
276 DEBUGF( "found in wave_entry_old (oldsize=%d)\n",
277 this->oldsize );
278 *wave_entry = *e;
279 goto wave_in_cache;
280 }
281 }
282
283 wave_entry->start_addr = start_addr;
284
285 uint8_t const* const loop_ptr =
286 RAM + GET_LE16A( sd [raw_voice->waveform].loop );
287 short* loop_start = 0;
288
289 short* out = BRRcache + start_addr * 2;
290 wave_entry->samples = out;
291 *out++ = 0;
292 int smp1 = 0;
293 int smp2 = 0;
294
295 uint8_t const* addr = RAM + start_addr;
296 int block_header;
297 do
298 {
299 if ( addr == loop_ptr )
300 {
301 loop_start = out;
302 DEBUGF( "loop at %08x (wave #%d)\n", addr - RAM, raw_voice->waveform );
303 }
304
305 /* header */
306 block_header = *addr;
307 addr += 9;
308 voice->addr = addr;
309 int const filter = (block_header & 0x0C) - 0x08;
310
311 /* scaling
312 (invalid scaling gives -4096 for neg nybble, 0 for pos) */
313 static unsigned char const right_shifts [16] = {
314 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29,
315 };
316 static unsigned char const left_shifts [16] = {
317 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
318 };
319 int const scale = block_header >> 4;
320 int const right_shift = right_shifts [scale];
321 int const left_shift = left_shifts [scale];
322
323 /* output position */
324 out += brr_block_size;
325 int offset = -brr_block_size << 2;
326
327 do /* decode and filter 16 samples */
328 {
329 /* Get nybble, sign-extend, then scale
330 get byte, select which nybble, sign-extend, then shift based
331 on scaling. also handles invalid scaling values. */
332 int delta = (int) (int8_t) (addr [offset >> 3] << (offset & 4))
333 >> right_shift << left_shift;
334
335 out [offset >> 2] = smp2;
336
337 if ( filter == 0 ) /* mode 0x08 (30-90% of the time) */
338 {
339 delta -= smp2 >> 1;
340 delta += smp2 >> 5;
341 smp2 = smp1;
342 delta += smp1;
343 delta += (-smp1 - (smp1 >> 1)) >> 5;
344 }
345 else
346 {
347 if ( filter == -4 ) /* mode 0x04 */
348 {
349 delta += smp1 >> 1;
350 delta += (-smp1) >> 5;
351 }
352 else if ( filter > -4 ) /* mode 0x0C */
353 {
354 delta -= smp2 >> 1;
355 delta += (smp2 + (smp2 >> 1)) >> 4;
356 delta += smp1;
357 delta += (-smp1 * 13) >> 7;
358 }
359 smp2 = smp1;
360 }
361
362 CLAMP16( delta, delta );
363 smp1 = (int16_t) (delta * 2); /* sign-extend */
364 }
365 while ( (offset += 4) != 0 );
366
367 /* if next block has end flag set, this block ends early */
368 /* (verified) */
369 if ( (block_header & 3) != 3 && (*addr & 3) == 1 )
370 {
371 /* skip last 9 samples */
372 out -= 9;
373 goto early_end;
374 }
375 }
376 while ( !(block_header & 1) && addr < RAM + 0x10000 );
377
378 out [0] = smp2;
379 out [1] = smp1;
380
381 early_end:
382 wave_entry->end = (out - 1 - wave_entry->samples) << 12;
383
384 wave_entry->loop = 0;
385 if ( (block_header & 2) )
386 {
387 if ( loop_start )
388 {
389 int loop = out - loop_start;
390 wave_entry->loop = loop;
391 wave_entry->end += 0x3000;
392 out [2] = loop_start [2];
393 out [3] = loop_start [3];
394 out [4] = loop_start [4];
395 }
396 else
397 {
398 DEBUGF( "loop point outside initial wave\n" );
399 }
400 }
401
402 DEBUGF( "end at %08x (wave #%d)\n", addr - RAM, raw_voice->waveform );
403
404 /* add to cache */
405 this->wave_entry_old [this->oldsize++] = *wave_entry;
406wave_in_cache:;
407 }
408}
409#endif
410
411static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice,
412 struct src_dir const* const sd,
413 struct raw_voice_t const* const raw_voice,
414 const int key_on_delay, const int vbit) ICODE_ATTR;
415static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice,
416 struct src_dir const* const sd,
417 struct raw_voice_t const* const raw_voice,
418 const int key_on_delay, const int vbit) {
419 #undef RAM
420 #define RAM ram.ram
421 int const env_rate_init = 0x7800;
422 voice->key_on_delay = key_on_delay;
423 if ( key_on_delay == 0 )
424 {
425 this->keys_down |= vbit;
426 voice->envx = 0;
427 voice->env_mode = state_attack;
428 voice->env_timer = env_rate_init; /* TODO: inaccurate? */
Michael Sevakisd31162a2007-02-20 10:27:39 +0000429 unsigned start_addr = GET_LE16A(sd [raw_voice->waveform].start);
Adam Gashlinb73960d2007-02-14 03:34:55 +0000430 #if !SPC_BRRCACHE
431 {
432 voice->addr = RAM + start_addr;
433 /* BRR filter uses previous samples */
434 voice->samples [brr_block_size + 1] = 0;
435 voice->samples [brr_block_size + 2] = 0;
436 /* decode three samples immediately */
437 voice->position = (brr_block_size + 3) * 0x1000 - 1;
438 voice->block_header = 0; /* "previous" BRR header */
439 }
440 #else
441 {
442 voice->position = 3 * 0x1000 - 1;
443 struct cache_entry_t* const wave_entry =
444 &this->wave_entry [raw_voice->waveform];
445
446 /* predecode BRR if not already */
447 if ( wave_entry->start_addr != start_addr )
448 {
449 /* the following line can be replaced by the indicated block
450 in decode_brr() */
451 decode_brr( this, start_addr, voice, raw_voice );
452 }
453
454 voice->samples = wave_entry->samples;
455 voice->wave_end = wave_entry->end;
456 voice->wave_loop = wave_entry->loop;
457 }
458 #endif
459 }
460}
461
462static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
463 ICODE_ATTR;
464static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
465{
466 #undef RAM
467#ifdef CPU_ARM
468 uint8_t* const ram_ = ram.ram;
469 #define RAM ram_
470#else
471 #define RAM ram.ram
472#endif
473#if 0
474 EXIT_TIMER(cpu);
475 ENTER_TIMER(dsp);
476#endif
Michael Sevakisd31162a2007-02-20 10:27:39 +0000477
Adam Gashlinb73960d2007-02-14 03:34:55 +0000478 /* Here we check for keys on/off. Docs say that successive writes
479 to KON/KOF must be separated by at least 2 Ts periods or risk
480 being neglected. Therefore DSP only looks at these during an
481 update, and not at the time of the write. Only need to do this
482 once however, since the regs haven't changed over the whole
483 period we need to catch up with. */
484
485 {
486 int key_ons = this->r.g.key_ons;
487 int key_offs = this->r.g.key_offs;
488 /* keying on a voice resets that bit in ENDX */
489 this->r.g.wave_ended &= ~key_ons;
490 /* key_off bits prevent key_on from being acknowledged */
491 this->r.g.key_ons = key_ons & key_offs;
492
493 /* process key events outside loop, since they won't re-occur */
494 struct voice_t* voice = this->voice_state + 8;
495 int vbit = 0x80;
496 do
497 {
498 --voice;
499 if ( key_offs & vbit )
500 {
501 voice->env_mode = state_release;
502 voice->key_on_delay = 0;
503 }
504 else if ( key_ons & vbit )
505 {
506 voice->key_on_delay = 8;
507 }
508 }
509 while ( (vbit >>= 1) != 0 );
510 }
511
512 struct src_dir const* const sd =
513 (struct src_dir*) &RAM [this->r.g.wave_page * 0x100];
Michael Sevakisd31162a2007-02-20 10:27:39 +0000514
515 #ifdef ROCKBOX_BIG_ENDIAN
516 /* Convert endiannesses before entering loops - these
517 get used alot */
518 const uint32_t rates[voice_count] =
519 {
520 GET_LE16A( this->r.voice[0].rate ) & 0x3FFF,
521 GET_LE16A( this->r.voice[1].rate ) & 0x3FFF,
522 GET_LE16A( this->r.voice[2].rate ) & 0x3FFF,
523 GET_LE16A( this->r.voice[3].rate ) & 0x3FFF,
524 GET_LE16A( this->r.voice[4].rate ) & 0x3FFF,
525 GET_LE16A( this->r.voice[5].rate ) & 0x3FFF,
526 GET_LE16A( this->r.voice[6].rate ) & 0x3FFF,
527 GET_LE16A( this->r.voice[7].rate ) & 0x3FFF,
528 };
529 #define VOICE_RATE(x) *(x)
530 #define IF_RBE(...) __VA_ARGS__
531 #ifdef CPU_COLDFIRE
532 /* Initialize mask register with the buffer address mask */
Michael Sevakisc2925812007-02-23 22:39:12 +0000533 asm volatile ("move.l %[m], %%mask" : : [m]"i"(fir_buf_mask));
Michael Sevakis46bb37a2007-02-20 13:06:11 +0000534 const int echo_wrap = (this->r.g.echo_delay & 15) * 0x800;
535 const int echo_start = this->r.g.echo_page * 0x100;
Michael Sevakisd31162a2007-02-20 10:27:39 +0000536 #endif /* CPU_COLDFIRE */
537 #else
538 #define VOICE_RATE(x) (INT16A(raw_voice->rate) & 0x3FFF)
539 #define IF_RBE(...)
540 #endif /* ROCKBOX_BIG_ENDIAN */
Adam Gashlinb73960d2007-02-14 03:34:55 +0000541
542#if !SPC_NOINTERP
543 int const slow_gaussian = (this->r.g.pitch_mods >> 1) |
544 this->r.g.noise_enables;
545#endif
546 /* (g.flags & 0x40) ? 30 : 14 */
Michael Sevakisd31162a2007-02-20 10:27:39 +0000547 int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14 - 8;
548 int const global_vol_0 = this->r.g.volume_0;
549 int const global_vol_1 = this->r.g.volume_1;
Adam Gashlinb73960d2007-02-14 03:34:55 +0000550
551 /* each rate divides exactly into 0x7800 without remainder */
552 int const env_rate_init = 0x7800;
553 static unsigned short const env_rates [0x20] ICONST_ATTR =
554 {
555 0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C,
556 0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180,
557 0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00,
558 0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800
559 };
560
561 do /* one pair of output samples per iteration */
562 {
563 /* Noise */
564 if ( this->r.g.noise_enables )
565 {
566 if ( (this->noise_count -=
567 env_rates [this->r.g.flags & 0x1F]) <= 0 )
568 {
569 this->noise_count = env_rate_init;
570 int feedback = (this->noise << 13) ^ (this->noise << 14);
571 this->noise = (feedback & 0x8000) ^ (this->noise >> 1 & ~1);
572 }
573 }
574
575#if !SPC_NOECHO
576 int echo_0 = 0;
577 int echo_1 = 0;
578#endif
579 long prev_outx = 0; /* TODO: correct value for first channel? */
580 int chans_0 = 0;
581 int chans_1 = 0;
582 /* TODO: put raw_voice pointer in voice_t? */
583 struct raw_voice_t * raw_voice = this->r.voice;
584 struct voice_t* voice = this->voice_state;
585 int vbit = 1;
Michael Sevakisd31162a2007-02-20 10:27:39 +0000586 IF_RBE( const uint32_t* vr = rates; )
587 for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice IF_RBE( , ++vr ) )
Adam Gashlinb73960d2007-02-14 03:34:55 +0000588 {
589 /* pregen involves checking keyon, etc */
590#if 0
591 ENTER_TIMER(dsp_pregen);
592#endif
593
594 /* Key on events are delayed */
595 int key_on_delay = voice->key_on_delay;
596
597 if ( --key_on_delay >= 0 ) /* <1% of the time */
598 {
599 key_on(this,voice,sd,raw_voice,key_on_delay,vbit);
600 }
601
602 if ( !(this->keys_down & vbit) ) /* Silent channel */
603 {
604 silent_chan:
605 raw_voice->envx = 0;
606 raw_voice->outx = 0;
607 prev_outx = 0;
608 continue;
609 }
610
611 /* Envelope */
612 {
613 int const env_range = 0x800;
614 int env_mode = voice->env_mode;
615 int adsr0 = raw_voice->adsr [0];
616 int env_timer;
617 if ( env_mode != state_release ) /* 99% of the time */
618 {
619 env_timer = voice->env_timer;
620 if ( adsr0 & 0x80 ) /* 79% of the time */
621 {
622 int adsr1 = raw_voice->adsr [1];
623 if ( env_mode == state_sustain ) /* 74% of the time */
624 {
625 if ( (env_timer -= env_rates [adsr1 & 0x1F]) > 0 )
626 goto write_env_timer;
627
628 int envx = voice->envx;
629 envx--; /* envx *= 255 / 256 */
630 envx -= envx >> 8;
631 voice->envx = envx;
632 /* TODO: should this be 8? */
633 raw_voice->envx = envx >> 4;
634 goto init_env_timer;
635 }
636 else if ( env_mode < 0 ) /* 25% state_decay */
637 {
638 int envx = voice->envx;
639 if ( (env_timer -=
640 env_rates [(adsr0 >> 3 & 0x0E) + 0x10]) <= 0 )
641 {
642 envx--; /* envx *= 255 / 256 */
643 envx -= envx >> 8;
644 voice->envx = envx;
645 /* TODO: should this be 8? */
646 raw_voice->envx = envx >> 4;
647 env_timer = env_rate_init;
648 }
649
650 int sustain_level = adsr1 >> 5;
651 if ( envx <= (sustain_level + 1) * 0x100 )
652 voice->env_mode = state_sustain;
653
654 goto write_env_timer;
655 }
656 else /* state_attack */
657 {
658 int t = adsr0 & 0x0F;
659 if ( (env_timer -= env_rates [t * 2 + 1]) > 0 )
660 goto write_env_timer;
661
662 int envx = voice->envx;
663
664 int const step = env_range / 64;
665 envx += step;
666 if ( t == 15 )
667 envx += env_range / 2 - step;
668
669 if ( envx >= env_range )
670 {
671 envx = env_range - 1;
672 voice->env_mode = state_decay;
673 }
674 voice->envx = envx;
675 /* TODO: should this be 8? */
676 raw_voice->envx = envx >> 4;
677 goto init_env_timer;
678 }
679 }
680 else /* gain mode */
681 {
682 int t = raw_voice->gain;
683 if ( t < 0x80 )
684 {
685 raw_voice->envx = t;
686 voice->envx = t << 4;
687 goto env_end;
688 }
689 else
690 {
691 if ( (env_timer -= env_rates [t & 0x1F]) > 0 )
692 goto write_env_timer;
693
694 int envx = voice->envx;
695 int mode = t >> 5;
696 if ( mode <= 5 ) /* decay */
697 {
698 int step = env_range / 64;
699 if ( mode == 5 ) /* exponential */
700 {
701 envx--; /* envx *= 255 / 256 */
702 step = envx >> 8;
703 }
704 if ( (envx -= step) < 0 )
705 {
706 envx = 0;
707 if ( voice->env_mode == state_attack )
708 voice->env_mode = state_decay;
709 }
710 }
711 else /* attack */
712 {
713 int const step = env_range / 64;
714 envx += step;
715 if ( mode == 7 &&
716 envx >= env_range * 3 / 4 + step )
717 envx += env_range / 256 - step;
718
719 if ( envx >= env_range )
720 envx = env_range - 1;
721 }
722 voice->envx = envx;
723 /* TODO: should this be 8? */
724 raw_voice->envx = envx >> 4;
725 goto init_env_timer;
726 }
727 }
728 }
729 else /* state_release */
730 {
731 int envx = voice->envx;
732 if ( (envx -= env_range / 256) > 0 )
733 {
734 voice->envx = envx;
735 raw_voice->envx = envx >> 8;
736 goto env_end;
737 }
738 else
739 {
740 /* bit was set, so this clears it */
741 this->keys_down ^= vbit;
742 voice->envx = 0;
743 goto silent_chan;
744 }
745 }
746 init_env_timer:
747 env_timer = env_rate_init;
748 write_env_timer:
749 voice->env_timer = env_timer;
750 env_end:;
751 }
752#if 0
753 EXIT_TIMER(dsp_pregen);
754
755 ENTER_TIMER(dsp_gen);
756#endif
757 #if !SPC_BRRCACHE
758 /* Decode BRR block */
759 if ( voice->position >= brr_block_size * 0x1000 )
760 {
761 voice->position -= brr_block_size * 0x1000;
762
763 uint8_t const* addr = voice->addr;
764 if ( addr >= RAM + 0x10000 )
765 addr -= 0x10000;
766
767 /* action based on previous block's header */
768 if ( voice->block_header & 1 )
769 {
770 addr = RAM + GET_LE16A( sd [raw_voice->waveform].loop );
771 this->r.g.wave_ended |= vbit;
772 if ( !(voice->block_header & 2) ) /* 1% of the time */
773 {
774 /* first block was end block;
775 don't play anything (verified) */
776 /* bit was set, so this clears it */
777 this->keys_down ^= vbit;
778
779 /* since voice->envx is 0,
780 samples and position don't matter */
781 raw_voice->envx = 0;
782 voice->envx = 0;
783 goto skip_decode;
784 }
785 }
786
787 /* header */
788 int const block_header = *addr;
789 addr += 9;
790 voice->addr = addr;
791 voice->block_header = block_header;
792 int const filter = (block_header & 0x0C) - 0x08;
793
794 /* scaling (invalid scaling gives -4096 for neg nybble,
795 0 for pos) */
796 static unsigned char const right_shifts [16] = {
797 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29,
798 };
799 static unsigned char const left_shifts [16] = {
800 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
801 };
802 int const scale = block_header >> 4;
803 int const right_shift = right_shifts [scale];
804 int const left_shift = left_shifts [scale];
805
806 /* previous samples */
807 int smp2 = voice->samples [brr_block_size + 1];
808 int smp1 = voice->samples [brr_block_size + 2];
809 voice->samples [0] = voice->samples [brr_block_size];
810
811 /* output position */
812 short* out = voice->samples + (1 + brr_block_size);
813 int offset = -brr_block_size << 2;
814
815 /* if next block has end flag set,
816 this block ends early (verified) */
817 if ( (block_header & 3) != 3 && (*addr & 3) == 1 )
818 {
819 /* arrange for last 9 samples to be skipped */
820 int const skip = 9;
821 out += (skip & 1);
822 voice->samples [skip] = voice->samples [brr_block_size];
823 voice->position += skip * 0x1000;
824 offset = (-brr_block_size + (skip & ~1)) << 2;
825 addr -= skip / 2;
826 /* force sample to end on next decode */
827 voice->block_header = 1;
828 }
829
830 do /* decode and filter 16 samples */
831 {
832 /* Get nybble, sign-extend, then scale
833 get byte, select which nybble, sign-extend, then shift
834 based on scaling. also handles invalid scaling values.*/
835 int delta = (int) (int8_t) (addr [offset >> 3] <<
836 (offset & 4)) >> right_shift << left_shift;
837
838 out [offset >> 2] = smp2;
839
840 if ( filter == 0 ) /* mode 0x08 (30-90% of the time) */
841 {
842 delta -= smp2 >> 1;
843 delta += smp2 >> 5;
844 smp2 = smp1;
845 delta += smp1;
846 delta += (-smp1 - (smp1 >> 1)) >> 5;
847 }
848 else
849 {
850 if ( filter == -4 ) /* mode 0x04 */
851 {
852 delta += smp1 >> 1;
853 delta += (-smp1) >> 5;
854 }
855 else if ( filter > -4 ) /* mode 0x0C */
856 {
857 delta -= smp2 >> 1;
858 delta += (smp2 + (smp2 >> 1)) >> 4;
859 delta += smp1;
860 delta += (-smp1 * 13) >> 7;
861 }
862 smp2 = smp1;
863 }
864
865 CLAMP16( delta, delta );
866 smp1 = (int16_t) (delta * 2); /* sign-extend */
867 }
868 while ( (offset += 4) != 0 );
869
870 out [0] = smp2;
871 out [1] = smp1;
872
873 skip_decode:;
874 }
875 #endif
876
877 /* Get rate (with possible modulation) */
Michael Sevakisd31162a2007-02-20 10:27:39 +0000878 int rate = VOICE_RATE(vr);
Adam Gashlinb73960d2007-02-14 03:34:55 +0000879 if ( this->r.g.pitch_mods & vbit )
880 rate = (rate * (prev_outx + 32768)) >> 15;
881
882 #if !SPC_NOINTERP
883 /* Interleved gauss table (to improve cache coherency). */
884 /* gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i] */
Michael Sevakis9764e092007-03-03 03:28:13 +0000885 static short const gauss [512] ICONST_ATTR =
Adam Gashlinb73960d2007-02-14 03:34:55 +0000886 {
887370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303,
888339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299,
889311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292,
890283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282,
891257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269,
892233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253,
893210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234,
894188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213,
895168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190,
896150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164,
897132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136,
898117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106,
899102,1102, 100,1098, 99,1094, 97,1090, 95,1086, 94,1082, 92,1078, 90,1074,
900 89,1070, 87,1066, 86,1061, 84,1057, 83,1053, 81,1049, 80,1045, 78,1040,
901 77,1036, 76,1032, 74,1027, 73,1023, 71,1019, 70,1014, 69,1010, 67,1005,
902 66,1001, 65, 997, 64, 992, 62, 988, 61, 983, 60, 978, 59, 974, 58, 969,
903 56, 965, 55, 960, 54, 955, 53, 951, 52, 946, 51, 941, 50, 937, 49, 932,
904 48, 927, 47, 923, 46, 918, 45, 913, 44, 908, 43, 904, 42, 899, 41, 894,
905 40, 889, 39, 884, 38, 880, 37, 875, 36, 870, 36, 865, 35, 860, 34, 855,
906 33, 851, 32, 846, 32, 841, 31, 836, 30, 831, 29, 826, 29, 821, 28, 816,
907 27, 811, 27, 806, 26, 802, 25, 797, 24, 792, 24, 787, 23, 782, 23, 777,
908 22, 772, 21, 767, 21, 762, 20, 757, 20, 752, 19, 747, 19, 742, 18, 737,
909 17, 732, 17, 728, 16, 723, 16, 718, 15, 713, 15, 708, 15, 703, 14, 698,
910 14, 693, 13, 688, 13, 683, 12, 678, 12, 674, 11, 669, 11, 664, 11, 659,
911 10, 654, 10, 649, 10, 644, 9, 640, 9, 635, 9, 630, 8, 625, 8, 620,
912 8, 615, 7, 611, 7, 606, 7, 601, 6, 596, 6, 592, 6, 587, 6, 582,
913 5, 577, 5, 573, 5, 568, 5, 563, 4, 559, 4, 554, 4, 550, 4, 545,
914 4, 540, 3, 536, 3, 531, 3, 527, 3, 522, 3, 517, 2, 513, 2, 508,
915 2, 504, 2, 499, 2, 495, 2, 491, 2, 486, 1, 482, 1, 477, 1, 473,
916 1, 469, 1, 464, 1, 460, 1, 456, 1, 451, 1, 447, 1, 443, 1, 439,
917 0, 434, 0, 430, 0, 426, 0, 422, 0, 418, 0, 414, 0, 410, 0, 405,
918 0, 401, 0, 397, 0, 393, 0, 389, 0, 385, 0, 381, 0, 378, 0, 374,
919 };
Adam Gashlinb73960d2007-02-14 03:34:55 +0000920 /* Gaussian interpolation using most recent 4 samples */
921 long position = voice->position;
922 voice->position += rate;
923 short const* interp = voice->samples + (position >> 12);
924 int offset = position >> 4 & 0xFF;
925
926 /* Only left half of gaussian kernel is in table, so we must mirror
927 for right half */
928 short const* fwd = gauss + offset * 2;
929 short const* rev = gauss + 510 - offset * 2;
930
931 /* Use faster gaussian interpolation when exact result isn't needed
932 by pitch modulator of next channel */
933 int amp_0, amp_1;
934 if ( !(slow_gaussian & vbit) ) /* 99% of the time */
935 {
936 /* Main optimization is lack of clamping. Not a problem since
937 output never goes more than +/- 16 outside 16-bit range and
938 things are clamped later anyway. Other optimization is to
939 preserve fractional accuracy, eliminating several masks. */
940 int output = (((fwd [0] * interp [0] +
941 fwd [1] * interp [1] +
942 rev [1] * interp [2] +
943 rev [0] * interp [3] ) >> 11) * voice->envx) >> 11;
944
945 /* duplicated here to give compiler more to run in parallel */
946 amp_0 = voice->volume [0] * output;
947 amp_1 = voice->volume [1] * output;
948 raw_voice->outx = output >> 8;
949 }
950 else
951 {
952 int output = *(int16_t*) &this->noise;
953 if ( !(this->r.g.noise_enables & vbit) )
954 {
955 output = (fwd [0] * interp [0]) & ~0xFFF;
956 output = (output + fwd [1] * interp [1]) & ~0xFFF;
957 output = (output + rev [1] * interp [2]) >> 12;
958 output = (int16_t) (output * 2);
959 output += ((rev [0] * interp [3]) >> 12) * 2;
960 CLAMP16( output, output );
961 }
962 output = (output * voice->envx) >> 11 & ~1;
963
964 /* duplicated here to give compiler more to run in parallel */
965 amp_0 = voice->volume [0] * output;
966 amp_1 = voice->volume [1] * output;
967 prev_outx = output;
968 raw_voice->outx = (int8_t) (output >> 8);
969 }
970 #else
971 /* two-point linear interpolation */
972 #ifdef CPU_COLDFIRE
Michael Sevakis9764e092007-03-03 03:28:13 +0000973 int amp_0 = (int16_t)this->noise;
974 int amp_1;
Adam Gashlinb73960d2007-02-14 03:34:55 +0000975
976 if ( (this->r.g.noise_enables & vbit) == 0 )
977 {
978 uint32_t f = voice->position;
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +0000979 int32_t y0;
Michael Sevakisd31162a2007-02-20 10:27:39 +0000980
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +0000981 /**
982 * Formula (fastest found so far of MANY):
983 * output = y0 + f*y1 - f*y0
984 */
Michael Sevakisc2925812007-02-23 22:39:12 +0000985 asm volatile (
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +0000986 /* separate fractional and whole parts */
987 "move.l %[f], %[y1] \r\n"
988 "and.l #0xfff, %[f] \r\n"
989 "lsr.l %[sh], %[y1] \r\n"
990 /* load samples y0 (upper) & y1 (lower) */
991 "move.l 2(%[s], %[y1].l*2), %[y1] \r\n"
992 /* %acc0 = f*y1 */
993 "mac.w %[f]l, %[y1]l, %%acc0 \r\n"
994 /* msac.w is 2% boostier so add negative */
995 "neg.l %[f] \r\n"
996 /* %acc0 -= f*y0 */
997 "mac.w %[f]l, %[y1]u, %%acc0 \r\n"
998 /* separate out y0 and sign extend */
999 "swap %[y1] \r\n"
1000 "movea.w %[y1], %[y0] \r\n"
1001 /* fetch result, scale down and add y0 */
1002 "movclr.l %%acc0, %[y1] \r\n"
1003 /* output = y0 + (result >> 12) */
1004 "asr.l %[sh], %[y1] \r\n"
1005 "add.l %[y0], %[y1] \r\n"
Michael Sevakis9764e092007-03-03 03:28:13 +00001006 : [f]"+&d"(f), [y0]"=&a"(y0), [y1]"=&d"(amp_0)
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001007 : [s]"a"(voice->samples), [sh]"d"(12)
Adam Gashlinb73960d2007-02-14 03:34:55 +00001008 );
1009 }
1010
Michael Sevakis9764e092007-03-03 03:28:13 +00001011 /* apply voice envelope to output */
1012 asm volatile (
1013 "mac.w %[output]l, %[envx]l, %%acc0 \r\n"
1014 :
1015 : [output]"r"(amp_0), [envx]"r"(voice->envx)
1016 );
1017
1018 /* advance voice position */
Adam Gashlinb73960d2007-02-14 03:34:55 +00001019 voice->position += rate;
Michael Sevakis9764e092007-03-03 03:28:13 +00001020
1021 /* fetch output, scale and apply left and right
1022 voice volume */
1023 asm volatile (
1024 "movclr.l %%acc0, %[output] \r\n"
1025 "asr.l %[sh], %[output] \r\n"
1026 "mac.l %[vvol_0], %[output], %%acc0 \r\n"
1027 "mac.l %[vvol_1], %[output], %%acc1 \r\n"
1028 : [output]"=&r"(amp_0)
1029 : [vvol_0]"r"((int)voice->volume[0]),
1030 [vvol_1]"r"((int)voice->volume[1]),
1031 [sh]"d"(11)
1032 );
1033
1034 /* save this output into previous, scale and save in
1035 output register */
1036 prev_outx = amp_0;
1037 raw_voice->outx = amp_0 >> 8;
1038
1039 /* fetch final voice output */
1040 asm volatile (
1041 "movclr.l %%acc0, %[amp_0] \r\n"
1042 "movclr.l %%acc1, %[amp_1] \r\n"
1043 : [amp_0]"=r"(amp_0), [amp_1]"=r"(amp_1)
1044 );
Adam Gashlinb73960d2007-02-14 03:34:55 +00001045 #else
1046
1047 /* Try this one out on ARM and see - similar to above but the asm
1048 on coldfire removes a redundant register load worth 1 or 2%;
1049 switching to loading two samples at once may help too. That's
1050 done above and while 6 to 7% faster on cf over two 16 bit loads
1051 it makes it endian dependant.
1052
1053 measured small improvement (~1.5%) - hcs
1054 */
1055
1056 int output;
1057
1058 if ( (this->r.g.noise_enables & vbit) == 0 )
1059 {
1060 int const fraction = voice->position & 0xfff;
1061 short const* const pos = (voice->samples + (voice->position >> 12)) + 1;
1062 output = pos[0] + ((fraction * (pos[1] - pos[0])) >> 12);
1063 } else {
1064 output = *(int16_t *)&this->noise;
1065 }
1066
1067 voice->position += rate;
1068
1069 /* old version */
1070#if 0
1071 int fraction = voice->position & 0xFFF;
1072 short const* const pos = voice->samples + (voice->position >> 12);
1073 voice->position += rate;
1074 int output =
1075 (pos [2] * fraction + pos [1] * (0x1000 - fraction)) >> 12;
1076 /* no interpolation (hardly faster, and crappy sounding) */
1077 /*int output = pos [0];*/
1078 if ( this->r.g.noise_enables & vbit )
1079 output = *(int16_t*) &this->noise;
1080#endif
Adam Gashlinb73960d2007-02-14 03:34:55 +00001081 output = (output * voice->envx) >> 11;
1082
1083 /* duplicated here to give compiler more to run in parallel */
1084 int amp_0 = voice->volume [0] * output;
1085 int amp_1 = voice->volume [1] * output;
1086
1087 prev_outx = output;
1088 raw_voice->outx = (int8_t) (output >> 8);
Michael Sevakis9764e092007-03-03 03:28:13 +00001089 #endif /* CPU_COLDFIRE */
Adam Gashlinb73960d2007-02-14 03:34:55 +00001090 #endif
1091
1092 #if SPC_BRRCACHE
1093 if ( voice->position >= voice->wave_end )
1094 {
1095 long loop_len = voice->wave_loop << 12;
1096 voice->position -= loop_len;
1097 this->r.g.wave_ended |= vbit;
1098 if ( !loop_len )
1099 {
1100 this->keys_down ^= vbit;
1101 raw_voice->envx = 0;
1102 voice->envx = 0;
1103 }
1104 }
1105 #endif
1106#if 0
1107 EXIT_TIMER(dsp_gen);
1108
1109 ENTER_TIMER(dsp_mix);
1110#endif
1111 chans_0 += amp_0;
1112 chans_1 += amp_1;
1113 #if !SPC_NOECHO
1114 if ( this->r.g.echo_ons & vbit )
1115 {
1116 echo_0 += amp_0;
1117 echo_1 += amp_1;
1118 }
1119 #endif
1120#if 0
1121 EXIT_TIMER(dsp_mix);
1122#endif
1123 }
1124 /* end of voice loop */
1125
1126 #if !SPC_NOECHO
Michael Sevakisd31162a2007-02-20 10:27:39 +00001127 #ifdef CPU_COLDFIRE
1128 /* Read feedback from echo buffer */
1129 int echo_pos = this->echo_pos;
Michael Sevakis46bb37a2007-02-20 13:06:11 +00001130 uint8_t* const echo_ptr = RAM + ((echo_start + echo_pos) & 0xFFFF);
1131 echo_pos += 4;
1132 if ( echo_pos >= echo_wrap )
1133 echo_pos = 0;
Michael Sevakisd31162a2007-02-20 10:27:39 +00001134 this->echo_pos = echo_pos;
1135 int fb = swap_odd_even32(*(int32_t *)echo_ptr);
1136 int out_0, out_1;
1137
1138 /* Keep last 8 samples */
1139 *this->last_fir_ptr = fb;
1140 this->last_fir_ptr = this->fir_ptr;
1141
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001142 /* Apply echo FIR filter to output samples read from echo buffer -
1143 circular buffer is hardware incremented and masked; FIR
1144 coefficients and buffer history are loaded in parallel with
1145 multiply accumulate operations. Shift left by one here and once
1146 again when calculating feedback to have sample values justified
1147 to bit 31 in the output to ease endian swap, interleaving and
1148 clamping before placing result in the program's echo buffer. */
Michael Sevakisd31162a2007-02-20 10:27:39 +00001149 int _0, _1, _2;
Michael Sevakisc2925812007-02-23 22:39:12 +00001150 asm volatile (
Michael Sevakisd31162a2007-02-20 10:27:39 +00001151 "move.l (%[fir_c]) , %[_2] \r\n"
1152 "mac.w %[fb]u, %[_2]u, <<, (%[fir_p])+&, %[_0], %%acc0 \r\n"
1153 "mac.w %[fb]l, %[_2]u, <<, (%[fir_p])& , %[_1], %%acc1 \r\n"
1154 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1155 "mac.w %[_0]l, %[_2]l, <<, 4(%[fir_c]) , %[_2], %%acc1 \r\n"
1156 "mac.w %[_1]u, %[_2]u, <<, 4(%[fir_p])& , %[_0], %%acc0 \r\n"
1157 "mac.w %[_1]l, %[_2]u, <<, 8(%[fir_p])& , %[_1], %%acc1 \r\n"
1158 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1159 "mac.w %[_0]l, %[_2]l, <<, 8(%[fir_c]) , %[_2], %%acc1 \r\n"
1160 "mac.w %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n"
1161 "mac.w %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n"
1162 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1163 "mac.w %[_0]l, %[_2]l, <<, 12(%[fir_c]) , %[_2], %%acc1 \r\n"
1164 "mac.w %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n"
1165 "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n"
1166 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1167 "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n"
Michael Sevakisd31162a2007-02-20 10:27:39 +00001168 : [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2),
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001169 [fir_p]"+a"(this->fir_ptr)
Michael Sevakisd31162a2007-02-20 10:27:39 +00001170 : [fir_c]"a"(this->fir_coeff), [fb]"r"(fb)
1171 );
1172
1173 /* Generate output */
Michael Sevakisc2925812007-02-23 22:39:12 +00001174 asm volatile (
Michael Sevakisfa9ea272007-03-03 01:40:55 +00001175 /* fetch filter results _after_ gcc loads asm
1176 block parameters to eliminate emac stalls */
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001177 "movclr.l %%acc0, %[out_0] \r\n"
1178 "movclr.l %%acc1, %[out_1] \r\n"
1179 /* apply global volume */
Michael Sevakisd31162a2007-02-20 10:27:39 +00001180 "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n"
1181 "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n"
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001182 /* apply echo volume and add to final output */
Michael Sevakisd31162a2007-02-20 10:27:39 +00001183 "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n"
1184 "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n"
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001185 : [out_0]"=&r"(out_0), [out_1]"=&r"(out_1)
Michael Sevakisd31162a2007-02-20 10:27:39 +00001186 : [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0),
1187 [ev_0]"r"((int)this->r.g.echo_volume_0),
1188 [chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1),
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001189 [ev_1]"r"((int)this->r.g.echo_volume_1)
Michael Sevakisd31162a2007-02-20 10:27:39 +00001190 );
1191
1192 /* Feedback into echo buffer */
1193 if ( !(this->r.g.flags & 0x20) )
1194 {
Michael Sevakisc2925812007-02-23 22:39:12 +00001195 asm volatile (
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001196 /* scale echo voices; saturate if overflow */
Michael Sevakisc2925812007-02-23 22:39:12 +00001197 "mac.l %[sh], %[e1] , %%acc1 \r\n"
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001198 "mac.l %[sh], %[e0] , %%acc0 \r\n"
1199 /* add scaled output from FIR filter */
Michael Sevakisd31162a2007-02-20 10:27:39 +00001200 "mac.l %[out_1], %[ef], <<, %%acc1 \r\n"
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001201 "mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
1202 /* swap and fetch feedback results - simply
1203 swap_odd_even32 mixed in between macs and
1204 movclrs to mitigate stall issues */
1205 "move.l #0x00ff00ff, %[sh] \r\n"
Michael Sevakisd31162a2007-02-20 10:27:39 +00001206 "movclr.l %%acc1, %[e1] \r\n"
1207 "swap %[e1] \r\n"
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001208 "movclr.l %%acc0, %[e0] \r\n"
Michael Sevakisd31162a2007-02-20 10:27:39 +00001209 "move.w %[e1], %[e0] \r\n"
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001210 "and.l %[e0], %[sh] \r\n"
1211 "eor.l %[sh], %[e0] \r\n"
1212 "lsl.l #8, %[sh] \r\n"
1213 "lsr.l #8, %[e0] \r\n"
1214 "or.l %[sh], %[e0] \r\n"
1215 /* save final feedback into echo buffer */
1216 "move.l %[e0], (%[echo_ptr]) \r\n"
Michael Sevakisd31162a2007-02-20 10:27:39 +00001217 : [e0]"+&d"(echo_0), [e1]"+&d"(echo_1)
1218 : [out_0]"r"(out_0), [out_1]"r"(out_1),
1219 [ef]"r"((int)this->r.g.echo_feedback),
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001220 [echo_ptr]"a"((int32_t *)echo_ptr),
1221 [sh]"d"(1 << 9)
Michael Sevakisd31162a2007-02-20 10:27:39 +00001222 );
Michael Sevakisd31162a2007-02-20 10:27:39 +00001223 }
1224
1225 /* Output final samples */
Michael Sevakisc2925812007-02-23 22:39:12 +00001226 asm volatile (
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001227 /* fetch output saved in %acc2 and %acc3 */
Michael Sevakisd31162a2007-02-20 10:27:39 +00001228 "movclr.l %%acc2, %[out_0] \r\n"
1229 "movclr.l %%acc3, %[out_1] \r\n"
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001230 /* scale right by global_muting shift */
Michael Sevakisd31162a2007-02-20 10:27:39 +00001231 "asr.l %[gm], %[out_0] \r\n"
1232 "asr.l %[gm], %[out_1] \r\n"
1233 : [out_0]"=&d"(out_0), [out_1]"=&d"(out_1)
1234 : [gm]"d"(global_muting)
1235 );
1236
1237 out_buf [ 0] = out_0;
1238 out_buf [WAV_CHUNK_SIZE] = out_1;
1239 out_buf ++;
1240 #else /* !CPU_COLDFIRE */
Adam Gashlinb73960d2007-02-14 03:34:55 +00001241 /* Read feedback from echo buffer */
1242 int echo_pos = this->echo_pos;
1243 uint8_t* const echo_ptr = RAM +
1244 ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF);
1245 echo_pos += 4;
1246 if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 )
1247 echo_pos = 0;
1248 this->echo_pos = echo_pos;
1249 int fb_0 = GET_LE16SA( echo_ptr );
1250 int fb_1 = GET_LE16SA( echo_ptr + 2 );
1251
1252 /* Keep last 8 samples */
1253 int (* const fir_ptr) [2] = this->fir_buf + this->fir_pos;
1254 this->fir_pos = (this->fir_pos + 1) & (fir_buf_half - 1);
1255 fir_ptr [ 0] [0] = fb_0;
1256 fir_ptr [ 0] [1] = fb_1;
1257 /* duplicate at +8 eliminates wrap checking below */
1258 fir_ptr [fir_buf_half] [0] = fb_0;
1259 fir_ptr [fir_buf_half] [1] = fb_1;
1260
1261 /* Apply FIR */
1262 fb_0 *= this->fir_coeff [0];
1263 fb_1 *= this->fir_coeff [0];
1264
1265 #define DO_PT( i )\
1266 fb_0 += fir_ptr [i] [0] * this->fir_coeff [i];\
1267 fb_1 += fir_ptr [i] [1] * this->fir_coeff [i];
1268
1269 DO_PT( 1 )
1270 DO_PT( 2 )
1271 DO_PT( 3 )
1272 DO_PT( 4 )
1273 DO_PT( 5 )
1274 DO_PT( 6 )
1275 DO_PT( 7 )
1276
1277 /* Generate output */
1278 int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0)
1279 >> global_muting;
1280 int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
1281 >> global_muting;
Michael Sevakisd31162a2007-02-20 10:27:39 +00001282 out_buf [ 0] = amp_0;
1283 out_buf [WAV_CHUNK_SIZE] = amp_1;
Adam Gashlinb73960d2007-02-14 03:34:55 +00001284 out_buf ++;
1285
1286 /* Feedback into echo buffer */
1287 int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
1288 int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
1289 if ( !(this->r.g.flags & 0x20) )
1290 {
1291 CLAMP16( e0, e0 );
1292 SET_LE16A( echo_ptr , e0 );
1293 CLAMP16( e1, e1 );
1294 SET_LE16A( echo_ptr + 2, e1 );
1295 }
Michael Sevakisd31162a2007-02-20 10:27:39 +00001296 #endif /* CPU_COLDFIRE */
Adam Gashlinb73960d2007-02-14 03:34:55 +00001297 #else
Michael Sevakisd31162a2007-02-20 10:27:39 +00001298 /* Generate output */
Adam Gashlinb73960d2007-02-14 03:34:55 +00001299 int amp_0 = (chans_0 * global_vol_0) >> global_muting;
1300 int amp_1 = (chans_1 * global_vol_1) >> global_muting;
Michael Sevakisd31162a2007-02-20 10:27:39 +00001301 out_buf [ 0] = amp_0;
1302 out_buf [WAV_CHUNK_SIZE] = amp_1;
Adam Gashlinb73960d2007-02-14 03:34:55 +00001303 out_buf ++;
1304 #endif
1305 }
1306 while ( --count );
1307#if 0
1308 EXIT_TIMER(dsp);
1309 ENTER_TIMER(cpu);
1310#endif
1311}
1312
1313static inline void DSP_run( struct Spc_Dsp* this, long count, int32_t* out )
1314{
1315 /* Should we just fill the buffer with silence? Flags won't be cleared */
1316 /* during this run so it seems it should keep resetting every sample. */
1317 if ( this->r.g.flags & 0x80 )
1318 DSP_reset( this );
1319
1320 DSP_run_( this, count, out );
1321}