blob: fdcd37f226a2d5217028f10fac3fe0dad25b27df [file] [log] [blame]
Adam Gashlinb73960d2007-02-14 03:34:55 +00001/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 *
9 * Copyright (C) 2006-2007 Adam Gashlin (hcs)
10 * Copyright (C) 2004-2007 Shay Green (blargg)
11 * Copyright (C) 2002 Brad Martin
12 *
13 * All files in this archive are subject to the GNU General Public License.
14 * See the file COPYING in the source tree root for full license agreement.
15 *
16 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
17 * KIND, either express or implied.
18 *
19 ****************************************************************************/
Michael Sevakis46bb37a2007-02-20 13:06:11 +000020
Adam Gashlinb73960d2007-02-14 03:34:55 +000021/* The DSP portion (awe!) */
22
23enum { voice_count = 8 };
24enum { register_count = 128 };
25
26struct raw_voice_t
27{
28 int8_t volume [2];
29 uint8_t rate [2];
30 uint8_t waveform;
31 uint8_t adsr [2]; /* envelope rates for attack, decay, and sustain */
32 uint8_t gain; /* envelope gain (if not using ADSR) */
33 int8_t envx; /* current envelope level */
34 int8_t outx; /* current sample */
35 int8_t unused [6];
36};
37
38struct globals_t
39{
40 int8_t unused1 [12];
41 int8_t volume_0; /* 0C Main Volume Left (-.7) */
42 int8_t echo_feedback; /* 0D Echo Feedback (-.7) */
43 int8_t unused2 [14];
44 int8_t volume_1; /* 1C Main Volume Right (-.7) */
45 int8_t unused3 [15];
46 int8_t echo_volume_0; /* 2C Echo Volume Left (-.7) */
47 uint8_t pitch_mods; /* 2D Pitch Modulation on/off for each voice */
48 int8_t unused4 [14];
49 int8_t echo_volume_1; /* 3C Echo Volume Right (-.7) */
50 uint8_t noise_enables; /* 3D Noise output on/off for each voice */
51 int8_t unused5 [14];
52 uint8_t key_ons; /* 4C Key On for each voice */
53 uint8_t echo_ons; /* 4D Echo on/off for each voice */
54 int8_t unused6 [14];
55 uint8_t key_offs; /* 5C key off for each voice
56 (instantiates release mode) */
57 uint8_t wave_page; /* 5D source directory (wave table offsets) */
58 int8_t unused7 [14];
59 uint8_t flags; /* 6C flags and noise freq */
60 uint8_t echo_page; /* 6D */
61 int8_t unused8 [14];
62 uint8_t wave_ended; /* 7C */
63 uint8_t echo_delay; /* 7D ms >> 4 */
64 char unused9 [2];
65};
66
67enum state_t { /* -1, 0, +1 allows more efficient if statements */
68 state_decay = -1,
69 state_sustain = 0,
70 state_attack = +1,
71 state_release = 2
72};
73
74struct cache_entry_t
75{
76 int16_t const* samples;
77 unsigned end; /* past-the-end position */
78 unsigned loop; /* number of samples in loop */
79 unsigned start_addr;
80};
81
82enum { brr_block_size = 16 };
83
84struct voice_t
85{
86#if SPC_BRRCACHE
87 int16_t const* samples;
88 long wave_end;
89 int wave_loop;
90#else
91 int16_t samples [3 + brr_block_size + 1];
92 int block_header; /* header byte from current block */
93#endif
94 uint8_t const* addr;
95 short volume [2];
96 long position;/* position in samples buffer, with 12-bit fraction */
97 short envx;
98 short env_mode;
99 short env_timer;
100 short key_on_delay;
101};
102
103#if SPC_BRRCACHE
104/* a little extra for samples that go past end */
105static int16_t BRRcache [0x20000 + 32];
106#endif
107
108enum { fir_buf_half = 8 };
109
Michael Sevakisd31162a2007-02-20 10:27:39 +0000110#ifdef CPU_COLDFIRE
111/* global because of the large aligment requirement for hardware masking -
112 * L-R interleaved 16-bit samples for easy loading and mac.w use.
113 */
114enum
115{
116 fir_buf_size = fir_buf_half * sizeof ( int32_t ),
117 fir_buf_mask = ~fir_buf_size
118};
119int32_t fir_buf[fir_buf_half]
120 __attribute__ ((aligned (fir_buf_size*2))) IBSS_ATTR;
121#endif /* CPU_COLDFIRE */
122
Adam Gashlinb73960d2007-02-14 03:34:55 +0000123struct Spc_Dsp
124{
125 union
126 {
127 struct raw_voice_t voice [voice_count];
128 uint8_t reg [register_count];
129 struct globals_t g;
130 int16_t align;
131 } r;
132
133 unsigned echo_pos;
134 int keys_down;
135 int noise_count;
136 uint16_t noise; /* also read as int16_t */
137
Michael Sevakisd31162a2007-02-20 10:27:39 +0000138#ifdef CPU_COLDFIRE
139 /* circularly hardware masked address */
140 int32_t *fir_ptr;
141 /* wrapped address just behind current position -
142 allows mac.w to increment and mask fir_ptr */
143 int32_t *last_fir_ptr;
144 /* copy of echo FIR constants as int16_t for use with mac.w */
145 int16_t fir_coeff[voice_count];
146#else
Adam Gashlinb73960d2007-02-14 03:34:55 +0000147 /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */
148 int fir_pos; /* (0 to 7) */
149 int fir_buf [fir_buf_half * 2] [2];
150 /* copy of echo FIR constants as int, for faster access */
151 int fir_coeff [voice_count];
Michael Sevakisd31162a2007-02-20 10:27:39 +0000152#endif
Adam Gashlinb73960d2007-02-14 03:34:55 +0000153
154 struct voice_t voice_state [voice_count];
155
156#if SPC_BRRCACHE
157 uint8_t oldsize;
158 struct cache_entry_t wave_entry [256];
159 struct cache_entry_t wave_entry_old [256];
160#endif
161};
162
163struct src_dir
164{
165 char start [2];
166 char loop [2];
167};
168
169static void DSP_reset( struct Spc_Dsp* this )
170{
171 this->keys_down = 0;
172 this->echo_pos = 0;
173 this->noise_count = 0;
174 this->noise = 2;
Adam Gashlinb73960d2007-02-14 03:34:55 +0000175
176 this->r.g.flags = 0xE0; /* reset, mute, echo off */
177 this->r.g.key_ons = 0;
178
179 memset( this->voice_state, 0, sizeof this->voice_state );
180
181 int i;
182 for ( i = voice_count; --i >= 0; )
183 {
184 struct voice_t* v = this->voice_state + i;
185 v->env_mode = state_release;
186 v->addr = ram.ram;
187 }
188
189 #if SPC_BRRCACHE
190 this->oldsize = 0;
191 for ( i = 0; i < 256; i++ )
192 this->wave_entry [i].start_addr = -1;
193 #endif
Michael Sevakisd31162a2007-02-20 10:27:39 +0000194
195#ifdef CPU_COLDFIRE
196 this->fir_ptr = fir_buf;
197 this->last_fir_ptr = &fir_buf [7];
198 memset( fir_buf, 0, sizeof fir_buf );
199#else
200 this->fir_pos = 0;
Adam Gashlinb73960d2007-02-14 03:34:55 +0000201 memset( this->fir_buf, 0, sizeof this->fir_buf );
Michael Sevakisd31162a2007-02-20 10:27:39 +0000202#endif
203
Adam Gashlinb73960d2007-02-14 03:34:55 +0000204 assert( offsetof (struct globals_t,unused9 [2]) == register_count );
205 assert( sizeof (this->r.voice) == register_count );
206}
207
208static void DSP_write( struct Spc_Dsp* this, int i, int data ) ICODE_ATTR;
209static void DSP_write( struct Spc_Dsp* this, int i, int data )
210{
211 assert( (unsigned) i < register_count );
212
213 this->r.reg [i] = data;
214 int high = i >> 4;
215 int low = i & 0x0F;
216 if ( low < 2 ) /* voice volumes */
217 {
218 int left = *(int8_t const*) &this->r.reg [i & ~1];
219 int right = *(int8_t const*) &this->r.reg [i | 1];
220 struct voice_t* v = this->voice_state + high;
221 v->volume [0] = left;
222 v->volume [1] = right;
223 }
224 else if ( low == 0x0F ) /* fir coefficients */
225 {
226 this->fir_coeff [7 - high] = (int8_t) data; /* sign-extend */
227 }
228}
229
230static inline int DSP_read( struct Spc_Dsp* this, int i )
231{
232 assert( (unsigned) i < register_count );
233 return this->r.reg [i];
234}
235
236/* if ( n < -32768 ) out = -32768; */
237/* if ( n > 32767 ) out = 32767; */
238#define CLAMP16( n, out )\
239{\
240 if ( (int16_t) n != n )\
241 out = 0x7FFF ^ (n >> 31);\
242}
243
244#if SPC_BRRCACHE
245static void decode_brr( struct Spc_Dsp* this, unsigned start_addr,
246 struct voice_t* voice,
247 struct raw_voice_t const* const raw_voice ) ICODE_ATTR;
248static void decode_brr( struct Spc_Dsp* this, unsigned start_addr,
249 struct voice_t* voice,
250 struct raw_voice_t const* const raw_voice )
251{
252 /* setup same variables as where decode_brr() is called from */
253 #undef RAM
254 #define RAM ram.ram
255 struct src_dir const* const sd =
256 (struct src_dir*) &RAM [this->r.g.wave_page * 0x100];
257 struct cache_entry_t* const wave_entry =
258 &this->wave_entry [raw_voice->waveform];
259
260 /* the following block can be put in place of the call to
261 decode_brr() below
262 */
263 {
264 DEBUGF( "decode at %08x (wave #%d)\n",
265 start_addr, raw_voice->waveform );
266
267 /* see if in cache */
268 int i;
269 for ( i = 0; i < this->oldsize; i++ )
270 {
271 struct cache_entry_t* e = &this->wave_entry_old [i];
272 if ( e->start_addr == start_addr )
273 {
274 DEBUGF( "found in wave_entry_old (oldsize=%d)\n",
275 this->oldsize );
276 *wave_entry = *e;
277 goto wave_in_cache;
278 }
279 }
280
281 wave_entry->start_addr = start_addr;
282
283 uint8_t const* const loop_ptr =
284 RAM + GET_LE16A( sd [raw_voice->waveform].loop );
285 short* loop_start = 0;
286
287 short* out = BRRcache + start_addr * 2;
288 wave_entry->samples = out;
289 *out++ = 0;
290 int smp1 = 0;
291 int smp2 = 0;
292
293 uint8_t const* addr = RAM + start_addr;
294 int block_header;
295 do
296 {
297 if ( addr == loop_ptr )
298 {
299 loop_start = out;
300 DEBUGF( "loop at %08x (wave #%d)\n", addr - RAM, raw_voice->waveform );
301 }
302
303 /* header */
304 block_header = *addr;
305 addr += 9;
306 voice->addr = addr;
307 int const filter = (block_header & 0x0C) - 0x08;
308
309 /* scaling
310 (invalid scaling gives -4096 for neg nybble, 0 for pos) */
311 static unsigned char const right_shifts [16] = {
312 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29,
313 };
314 static unsigned char const left_shifts [16] = {
315 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
316 };
317 int const scale = block_header >> 4;
318 int const right_shift = right_shifts [scale];
319 int const left_shift = left_shifts [scale];
320
321 /* output position */
322 out += brr_block_size;
323 int offset = -brr_block_size << 2;
324
325 do /* decode and filter 16 samples */
326 {
327 /* Get nybble, sign-extend, then scale
328 get byte, select which nybble, sign-extend, then shift based
329 on scaling. also handles invalid scaling values. */
330 int delta = (int) (int8_t) (addr [offset >> 3] << (offset & 4))
331 >> right_shift << left_shift;
332
333 out [offset >> 2] = smp2;
334
335 if ( filter == 0 ) /* mode 0x08 (30-90% of the time) */
336 {
337 delta -= smp2 >> 1;
338 delta += smp2 >> 5;
339 smp2 = smp1;
340 delta += smp1;
341 delta += (-smp1 - (smp1 >> 1)) >> 5;
342 }
343 else
344 {
345 if ( filter == -4 ) /* mode 0x04 */
346 {
347 delta += smp1 >> 1;
348 delta += (-smp1) >> 5;
349 }
350 else if ( filter > -4 ) /* mode 0x0C */
351 {
352 delta -= smp2 >> 1;
353 delta += (smp2 + (smp2 >> 1)) >> 4;
354 delta += smp1;
355 delta += (-smp1 * 13) >> 7;
356 }
357 smp2 = smp1;
358 }
359
360 CLAMP16( delta, delta );
361 smp1 = (int16_t) (delta * 2); /* sign-extend */
362 }
363 while ( (offset += 4) != 0 );
364
365 /* if next block has end flag set, this block ends early */
366 /* (verified) */
367 if ( (block_header & 3) != 3 && (*addr & 3) == 1 )
368 {
369 /* skip last 9 samples */
370 out -= 9;
371 goto early_end;
372 }
373 }
374 while ( !(block_header & 1) && addr < RAM + 0x10000 );
375
376 out [0] = smp2;
377 out [1] = smp1;
378
379 early_end:
380 wave_entry->end = (out - 1 - wave_entry->samples) << 12;
381
382 wave_entry->loop = 0;
383 if ( (block_header & 2) )
384 {
385 if ( loop_start )
386 {
387 int loop = out - loop_start;
388 wave_entry->loop = loop;
389 wave_entry->end += 0x3000;
390 out [2] = loop_start [2];
391 out [3] = loop_start [3];
392 out [4] = loop_start [4];
393 }
394 else
395 {
396 DEBUGF( "loop point outside initial wave\n" );
397 }
398 }
399
400 DEBUGF( "end at %08x (wave #%d)\n", addr - RAM, raw_voice->waveform );
401
402 /* add to cache */
403 this->wave_entry_old [this->oldsize++] = *wave_entry;
404wave_in_cache:;
405 }
406}
407#endif
408
409static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice,
410 struct src_dir const* const sd,
411 struct raw_voice_t const* const raw_voice,
412 const int key_on_delay, const int vbit) ICODE_ATTR;
413static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice,
414 struct src_dir const* const sd,
415 struct raw_voice_t const* const raw_voice,
416 const int key_on_delay, const int vbit) {
417 #undef RAM
418 #define RAM ram.ram
419 int const env_rate_init = 0x7800;
420 voice->key_on_delay = key_on_delay;
421 if ( key_on_delay == 0 )
422 {
423 this->keys_down |= vbit;
424 voice->envx = 0;
425 voice->env_mode = state_attack;
426 voice->env_timer = env_rate_init; /* TODO: inaccurate? */
Michael Sevakisd31162a2007-02-20 10:27:39 +0000427 unsigned start_addr = GET_LE16A(sd [raw_voice->waveform].start);
Adam Gashlinb73960d2007-02-14 03:34:55 +0000428 #if !SPC_BRRCACHE
429 {
430 voice->addr = RAM + start_addr;
431 /* BRR filter uses previous samples */
432 voice->samples [brr_block_size + 1] = 0;
433 voice->samples [brr_block_size + 2] = 0;
434 /* decode three samples immediately */
435 voice->position = (brr_block_size + 3) * 0x1000 - 1;
436 voice->block_header = 0; /* "previous" BRR header */
437 }
438 #else
439 {
440 voice->position = 3 * 0x1000 - 1;
441 struct cache_entry_t* const wave_entry =
442 &this->wave_entry [raw_voice->waveform];
443
444 /* predecode BRR if not already */
445 if ( wave_entry->start_addr != start_addr )
446 {
447 /* the following line can be replaced by the indicated block
448 in decode_brr() */
449 decode_brr( this, start_addr, voice, raw_voice );
450 }
451
452 voice->samples = wave_entry->samples;
453 voice->wave_end = wave_entry->end;
454 voice->wave_loop = wave_entry->loop;
455 }
456 #endif
457 }
458}
459
460static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
461 ICODE_ATTR;
462static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
463{
464 #undef RAM
465#ifdef CPU_ARM
466 uint8_t* const ram_ = ram.ram;
467 #define RAM ram_
468#else
469 #define RAM ram.ram
470#endif
471#if 0
472 EXIT_TIMER(cpu);
473 ENTER_TIMER(dsp);
474#endif
Michael Sevakisd31162a2007-02-20 10:27:39 +0000475
Adam Gashlinb73960d2007-02-14 03:34:55 +0000476 /* Here we check for keys on/off. Docs say that successive writes
477 to KON/KOF must be separated by at least 2 Ts periods or risk
478 being neglected. Therefore DSP only looks at these during an
479 update, and not at the time of the write. Only need to do this
480 once however, since the regs haven't changed over the whole
481 period we need to catch up with. */
482
483 {
484 int key_ons = this->r.g.key_ons;
485 int key_offs = this->r.g.key_offs;
486 /* keying on a voice resets that bit in ENDX */
487 this->r.g.wave_ended &= ~key_ons;
488 /* key_off bits prevent key_on from being acknowledged */
489 this->r.g.key_ons = key_ons & key_offs;
490
491 /* process key events outside loop, since they won't re-occur */
492 struct voice_t* voice = this->voice_state + 8;
493 int vbit = 0x80;
494 do
495 {
496 --voice;
497 if ( key_offs & vbit )
498 {
499 voice->env_mode = state_release;
500 voice->key_on_delay = 0;
501 }
502 else if ( key_ons & vbit )
503 {
504 voice->key_on_delay = 8;
505 }
506 }
507 while ( (vbit >>= 1) != 0 );
508 }
509
510 struct src_dir const* const sd =
511 (struct src_dir*) &RAM [this->r.g.wave_page * 0x100];
Michael Sevakisd31162a2007-02-20 10:27:39 +0000512
513 #ifdef ROCKBOX_BIG_ENDIAN
514 /* Convert endiannesses before entering loops - these
515 get used alot */
516 const uint32_t rates[voice_count] =
517 {
518 GET_LE16A( this->r.voice[0].rate ) & 0x3FFF,
519 GET_LE16A( this->r.voice[1].rate ) & 0x3FFF,
520 GET_LE16A( this->r.voice[2].rate ) & 0x3FFF,
521 GET_LE16A( this->r.voice[3].rate ) & 0x3FFF,
522 GET_LE16A( this->r.voice[4].rate ) & 0x3FFF,
523 GET_LE16A( this->r.voice[5].rate ) & 0x3FFF,
524 GET_LE16A( this->r.voice[6].rate ) & 0x3FFF,
525 GET_LE16A( this->r.voice[7].rate ) & 0x3FFF,
526 };
527 #define VOICE_RATE(x) *(x)
528 #define IF_RBE(...) __VA_ARGS__
529 #ifdef CPU_COLDFIRE
530 /* Initialize mask register with the buffer address mask */
Michael Sevakisc2925812007-02-23 22:39:12 +0000531 asm volatile ("move.l %[m], %%mask" : : [m]"i"(fir_buf_mask));
Michael Sevakis46bb37a2007-02-20 13:06:11 +0000532 const int echo_wrap = (this->r.g.echo_delay & 15) * 0x800;
533 const int echo_start = this->r.g.echo_page * 0x100;
Michael Sevakisd31162a2007-02-20 10:27:39 +0000534 #endif /* CPU_COLDFIRE */
535 #else
536 #define VOICE_RATE(x) (INT16A(raw_voice->rate) & 0x3FFF)
537 #define IF_RBE(...)
538 #endif /* ROCKBOX_BIG_ENDIAN */
Adam Gashlinb73960d2007-02-14 03:34:55 +0000539
540#if !SPC_NOINTERP
541 int const slow_gaussian = (this->r.g.pitch_mods >> 1) |
542 this->r.g.noise_enables;
543#endif
544 /* (g.flags & 0x40) ? 30 : 14 */
Michael Sevakisd31162a2007-02-20 10:27:39 +0000545 int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14 - 8;
546 int const global_vol_0 = this->r.g.volume_0;
547 int const global_vol_1 = this->r.g.volume_1;
Adam Gashlinb73960d2007-02-14 03:34:55 +0000548
549 /* each rate divides exactly into 0x7800 without remainder */
550 int const env_rate_init = 0x7800;
551 static unsigned short const env_rates [0x20] ICONST_ATTR =
552 {
553 0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C,
554 0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180,
555 0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00,
556 0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800
557 };
558
559 do /* one pair of output samples per iteration */
560 {
561 /* Noise */
562 if ( this->r.g.noise_enables )
563 {
564 if ( (this->noise_count -=
565 env_rates [this->r.g.flags & 0x1F]) <= 0 )
566 {
567 this->noise_count = env_rate_init;
568 int feedback = (this->noise << 13) ^ (this->noise << 14);
569 this->noise = (feedback & 0x8000) ^ (this->noise >> 1 & ~1);
570 }
571 }
572
573#if !SPC_NOECHO
574 int echo_0 = 0;
575 int echo_1 = 0;
576#endif
577 long prev_outx = 0; /* TODO: correct value for first channel? */
578 int chans_0 = 0;
579 int chans_1 = 0;
580 /* TODO: put raw_voice pointer in voice_t? */
581 struct raw_voice_t * raw_voice = this->r.voice;
582 struct voice_t* voice = this->voice_state;
583 int vbit = 1;
Michael Sevakisd31162a2007-02-20 10:27:39 +0000584 IF_RBE( const uint32_t* vr = rates; )
585 for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice IF_RBE( , ++vr ) )
Adam Gashlinb73960d2007-02-14 03:34:55 +0000586 {
587 /* pregen involves checking keyon, etc */
588#if 0
589 ENTER_TIMER(dsp_pregen);
590#endif
591
592 /* Key on events are delayed */
593 int key_on_delay = voice->key_on_delay;
594
595 if ( --key_on_delay >= 0 ) /* <1% of the time */
596 {
597 key_on(this,voice,sd,raw_voice,key_on_delay,vbit);
598 }
599
600 if ( !(this->keys_down & vbit) ) /* Silent channel */
601 {
602 silent_chan:
603 raw_voice->envx = 0;
604 raw_voice->outx = 0;
605 prev_outx = 0;
606 continue;
607 }
608
609 /* Envelope */
610 {
611 int const env_range = 0x800;
612 int env_mode = voice->env_mode;
613 int adsr0 = raw_voice->adsr [0];
614 int env_timer;
615 if ( env_mode != state_release ) /* 99% of the time */
616 {
617 env_timer = voice->env_timer;
618 if ( adsr0 & 0x80 ) /* 79% of the time */
619 {
620 int adsr1 = raw_voice->adsr [1];
621 if ( env_mode == state_sustain ) /* 74% of the time */
622 {
623 if ( (env_timer -= env_rates [adsr1 & 0x1F]) > 0 )
624 goto write_env_timer;
625
626 int envx = voice->envx;
627 envx--; /* envx *= 255 / 256 */
628 envx -= envx >> 8;
629 voice->envx = envx;
630 /* TODO: should this be 8? */
631 raw_voice->envx = envx >> 4;
632 goto init_env_timer;
633 }
634 else if ( env_mode < 0 ) /* 25% state_decay */
635 {
636 int envx = voice->envx;
637 if ( (env_timer -=
638 env_rates [(adsr0 >> 3 & 0x0E) + 0x10]) <= 0 )
639 {
640 envx--; /* envx *= 255 / 256 */
641 envx -= envx >> 8;
642 voice->envx = envx;
643 /* TODO: should this be 8? */
644 raw_voice->envx = envx >> 4;
645 env_timer = env_rate_init;
646 }
647
648 int sustain_level = adsr1 >> 5;
649 if ( envx <= (sustain_level + 1) * 0x100 )
650 voice->env_mode = state_sustain;
651
652 goto write_env_timer;
653 }
654 else /* state_attack */
655 {
656 int t = adsr0 & 0x0F;
657 if ( (env_timer -= env_rates [t * 2 + 1]) > 0 )
658 goto write_env_timer;
659
660 int envx = voice->envx;
661
662 int const step = env_range / 64;
663 envx += step;
664 if ( t == 15 )
665 envx += env_range / 2 - step;
666
667 if ( envx >= env_range )
668 {
669 envx = env_range - 1;
670 voice->env_mode = state_decay;
671 }
672 voice->envx = envx;
673 /* TODO: should this be 8? */
674 raw_voice->envx = envx >> 4;
675 goto init_env_timer;
676 }
677 }
678 else /* gain mode */
679 {
680 int t = raw_voice->gain;
681 if ( t < 0x80 )
682 {
683 raw_voice->envx = t;
684 voice->envx = t << 4;
685 goto env_end;
686 }
687 else
688 {
689 if ( (env_timer -= env_rates [t & 0x1F]) > 0 )
690 goto write_env_timer;
691
692 int envx = voice->envx;
693 int mode = t >> 5;
694 if ( mode <= 5 ) /* decay */
695 {
696 int step = env_range / 64;
697 if ( mode == 5 ) /* exponential */
698 {
699 envx--; /* envx *= 255 / 256 */
700 step = envx >> 8;
701 }
702 if ( (envx -= step) < 0 )
703 {
704 envx = 0;
705 if ( voice->env_mode == state_attack )
706 voice->env_mode = state_decay;
707 }
708 }
709 else /* attack */
710 {
711 int const step = env_range / 64;
712 envx += step;
713 if ( mode == 7 &&
714 envx >= env_range * 3 / 4 + step )
715 envx += env_range / 256 - step;
716
717 if ( envx >= env_range )
718 envx = env_range - 1;
719 }
720 voice->envx = envx;
721 /* TODO: should this be 8? */
722 raw_voice->envx = envx >> 4;
723 goto init_env_timer;
724 }
725 }
726 }
727 else /* state_release */
728 {
729 int envx = voice->envx;
730 if ( (envx -= env_range / 256) > 0 )
731 {
732 voice->envx = envx;
733 raw_voice->envx = envx >> 8;
734 goto env_end;
735 }
736 else
737 {
738 /* bit was set, so this clears it */
739 this->keys_down ^= vbit;
740 voice->envx = 0;
741 goto silent_chan;
742 }
743 }
744 init_env_timer:
745 env_timer = env_rate_init;
746 write_env_timer:
747 voice->env_timer = env_timer;
748 env_end:;
749 }
750#if 0
751 EXIT_TIMER(dsp_pregen);
752
753 ENTER_TIMER(dsp_gen);
754#endif
755 #if !SPC_BRRCACHE
756 /* Decode BRR block */
757 if ( voice->position >= brr_block_size * 0x1000 )
758 {
759 voice->position -= brr_block_size * 0x1000;
760
761 uint8_t const* addr = voice->addr;
762 if ( addr >= RAM + 0x10000 )
763 addr -= 0x10000;
764
765 /* action based on previous block's header */
766 if ( voice->block_header & 1 )
767 {
768 addr = RAM + GET_LE16A( sd [raw_voice->waveform].loop );
769 this->r.g.wave_ended |= vbit;
770 if ( !(voice->block_header & 2) ) /* 1% of the time */
771 {
772 /* first block was end block;
773 don't play anything (verified) */
774 /* bit was set, so this clears it */
775 this->keys_down ^= vbit;
776
777 /* since voice->envx is 0,
778 samples and position don't matter */
779 raw_voice->envx = 0;
780 voice->envx = 0;
781 goto skip_decode;
782 }
783 }
784
785 /* header */
786 int const block_header = *addr;
787 addr += 9;
788 voice->addr = addr;
789 voice->block_header = block_header;
790 int const filter = (block_header & 0x0C) - 0x08;
791
792 /* scaling (invalid scaling gives -4096 for neg nybble,
793 0 for pos) */
794 static unsigned char const right_shifts [16] = {
795 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29,
796 };
797 static unsigned char const left_shifts [16] = {
798 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
799 };
800 int const scale = block_header >> 4;
801 int const right_shift = right_shifts [scale];
802 int const left_shift = left_shifts [scale];
803
804 /* previous samples */
805 int smp2 = voice->samples [brr_block_size + 1];
806 int smp1 = voice->samples [brr_block_size + 2];
807 voice->samples [0] = voice->samples [brr_block_size];
808
809 /* output position */
810 short* out = voice->samples + (1 + brr_block_size);
811 int offset = -brr_block_size << 2;
812
813 /* if next block has end flag set,
814 this block ends early (verified) */
815 if ( (block_header & 3) != 3 && (*addr & 3) == 1 )
816 {
817 /* arrange for last 9 samples to be skipped */
818 int const skip = 9;
819 out += (skip & 1);
820 voice->samples [skip] = voice->samples [brr_block_size];
821 voice->position += skip * 0x1000;
822 offset = (-brr_block_size + (skip & ~1)) << 2;
823 addr -= skip / 2;
824 /* force sample to end on next decode */
825 voice->block_header = 1;
826 }
827
828 do /* decode and filter 16 samples */
829 {
830 /* Get nybble, sign-extend, then scale
831 get byte, select which nybble, sign-extend, then shift
832 based on scaling. also handles invalid scaling values.*/
833 int delta = (int) (int8_t) (addr [offset >> 3] <<
834 (offset & 4)) >> right_shift << left_shift;
835
836 out [offset >> 2] = smp2;
837
838 if ( filter == 0 ) /* mode 0x08 (30-90% of the time) */
839 {
840 delta -= smp2 >> 1;
841 delta += smp2 >> 5;
842 smp2 = smp1;
843 delta += smp1;
844 delta += (-smp1 - (smp1 >> 1)) >> 5;
845 }
846 else
847 {
848 if ( filter == -4 ) /* mode 0x04 */
849 {
850 delta += smp1 >> 1;
851 delta += (-smp1) >> 5;
852 }
853 else if ( filter > -4 ) /* mode 0x0C */
854 {
855 delta -= smp2 >> 1;
856 delta += (smp2 + (smp2 >> 1)) >> 4;
857 delta += smp1;
858 delta += (-smp1 * 13) >> 7;
859 }
860 smp2 = smp1;
861 }
862
863 CLAMP16( delta, delta );
864 smp1 = (int16_t) (delta * 2); /* sign-extend */
865 }
866 while ( (offset += 4) != 0 );
867
868 out [0] = smp2;
869 out [1] = smp1;
870
871 skip_decode:;
872 }
873 #endif
874
875 /* Get rate (with possible modulation) */
Michael Sevakisd31162a2007-02-20 10:27:39 +0000876 int rate = VOICE_RATE(vr);
Adam Gashlinb73960d2007-02-14 03:34:55 +0000877 if ( this->r.g.pitch_mods & vbit )
878 rate = (rate * (prev_outx + 32768)) >> 15;
879
880 #if !SPC_NOINTERP
881 /* Interleved gauss table (to improve cache coherency). */
882 /* gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i] */
883 static short const gauss [512] =
884 {
885370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303,
886339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299,
887311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292,
888283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282,
889257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269,
890233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253,
891210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234,
892188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213,
893168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190,
894150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164,
895132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136,
896117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106,
897102,1102, 100,1098, 99,1094, 97,1090, 95,1086, 94,1082, 92,1078, 90,1074,
898 89,1070, 87,1066, 86,1061, 84,1057, 83,1053, 81,1049, 80,1045, 78,1040,
899 77,1036, 76,1032, 74,1027, 73,1023, 71,1019, 70,1014, 69,1010, 67,1005,
900 66,1001, 65, 997, 64, 992, 62, 988, 61, 983, 60, 978, 59, 974, 58, 969,
901 56, 965, 55, 960, 54, 955, 53, 951, 52, 946, 51, 941, 50, 937, 49, 932,
902 48, 927, 47, 923, 46, 918, 45, 913, 44, 908, 43, 904, 42, 899, 41, 894,
903 40, 889, 39, 884, 38, 880, 37, 875, 36, 870, 36, 865, 35, 860, 34, 855,
904 33, 851, 32, 846, 32, 841, 31, 836, 30, 831, 29, 826, 29, 821, 28, 816,
905 27, 811, 27, 806, 26, 802, 25, 797, 24, 792, 24, 787, 23, 782, 23, 777,
906 22, 772, 21, 767, 21, 762, 20, 757, 20, 752, 19, 747, 19, 742, 18, 737,
907 17, 732, 17, 728, 16, 723, 16, 718, 15, 713, 15, 708, 15, 703, 14, 698,
908 14, 693, 13, 688, 13, 683, 12, 678, 12, 674, 11, 669, 11, 664, 11, 659,
909 10, 654, 10, 649, 10, 644, 9, 640, 9, 635, 9, 630, 8, 625, 8, 620,
910 8, 615, 7, 611, 7, 606, 7, 601, 6, 596, 6, 592, 6, 587, 6, 582,
911 5, 577, 5, 573, 5, 568, 5, 563, 4, 559, 4, 554, 4, 550, 4, 545,
912 4, 540, 3, 536, 3, 531, 3, 527, 3, 522, 3, 517, 2, 513, 2, 508,
913 2, 504, 2, 499, 2, 495, 2, 491, 2, 486, 1, 482, 1, 477, 1, 473,
914 1, 469, 1, 464, 1, 460, 1, 456, 1, 451, 1, 447, 1, 443, 1, 439,
915 0, 434, 0, 430, 0, 426, 0, 422, 0, 418, 0, 414, 0, 410, 0, 405,
916 0, 401, 0, 397, 0, 393, 0, 389, 0, 385, 0, 381, 0, 378, 0, 374,
917 };
918
919 /* Gaussian interpolation using most recent 4 samples */
920 long position = voice->position;
921 voice->position += rate;
922 short const* interp = voice->samples + (position >> 12);
923 int offset = position >> 4 & 0xFF;
924
925 /* Only left half of gaussian kernel is in table, so we must mirror
926 for right half */
927 short const* fwd = gauss + offset * 2;
928 short const* rev = gauss + 510 - offset * 2;
929
930 /* Use faster gaussian interpolation when exact result isn't needed
931 by pitch modulator of next channel */
932 int amp_0, amp_1;
933 if ( !(slow_gaussian & vbit) ) /* 99% of the time */
934 {
935 /* Main optimization is lack of clamping. Not a problem since
936 output never goes more than +/- 16 outside 16-bit range and
937 things are clamped later anyway. Other optimization is to
938 preserve fractional accuracy, eliminating several masks. */
939 int output = (((fwd [0] * interp [0] +
940 fwd [1] * interp [1] +
941 rev [1] * interp [2] +
942 rev [0] * interp [3] ) >> 11) * voice->envx) >> 11;
943
944 /* duplicated here to give compiler more to run in parallel */
945 amp_0 = voice->volume [0] * output;
946 amp_1 = voice->volume [1] * output;
947 raw_voice->outx = output >> 8;
948 }
949 else
950 {
951 int output = *(int16_t*) &this->noise;
952 if ( !(this->r.g.noise_enables & vbit) )
953 {
954 output = (fwd [0] * interp [0]) & ~0xFFF;
955 output = (output + fwd [1] * interp [1]) & ~0xFFF;
956 output = (output + rev [1] * interp [2]) >> 12;
957 output = (int16_t) (output * 2);
958 output += ((rev [0] * interp [3]) >> 12) * 2;
959 CLAMP16( output, output );
960 }
961 output = (output * voice->envx) >> 11 & ~1;
962
963 /* duplicated here to give compiler more to run in parallel */
964 amp_0 = voice->volume [0] * output;
965 amp_1 = voice->volume [1] * output;
966 prev_outx = output;
967 raw_voice->outx = (int8_t) (output >> 8);
968 }
969 #else
970 /* two-point linear interpolation */
971 #ifdef CPU_COLDFIRE
972 int32_t output = (int16_t)this->noise;
973
974 if ( (this->r.g.noise_enables & vbit) == 0 )
975 {
976 uint32_t f = voice->position;
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +0000977 int32_t y0;
Michael Sevakisd31162a2007-02-20 10:27:39 +0000978
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +0000979 /**
980 * Formula (fastest found so far of MANY):
981 * output = y0 + f*y1 - f*y0
982 */
Michael Sevakisc2925812007-02-23 22:39:12 +0000983 asm volatile (
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +0000984 /* separate fractional and whole parts */
985 "move.l %[f], %[y1] \r\n"
986 "and.l #0xfff, %[f] \r\n"
987 "lsr.l %[sh], %[y1] \r\n"
988 /* load samples y0 (upper) & y1 (lower) */
989 "move.l 2(%[s], %[y1].l*2), %[y1] \r\n"
990 /* %acc0 = f*y1 */
991 "mac.w %[f]l, %[y1]l, %%acc0 \r\n"
992 /* msac.w is 2% boostier so add negative */
993 "neg.l %[f] \r\n"
994 /* %acc0 -= f*y0 */
995 "mac.w %[f]l, %[y1]u, %%acc0 \r\n"
996 /* separate out y0 and sign extend */
997 "swap %[y1] \r\n"
998 "movea.w %[y1], %[y0] \r\n"
999 /* fetch result, scale down and add y0 */
1000 "movclr.l %%acc0, %[y1] \r\n"
1001 /* output = y0 + (result >> 12) */
1002 "asr.l %[sh], %[y1] \r\n"
1003 "add.l %[y0], %[y1] \r\n"
1004 : [f]"+&d"(f), [y0]"=&a"(y0), [y1]"=&d"(output)
1005 : [s]"a"(voice->samples), [sh]"d"(12)
Adam Gashlinb73960d2007-02-14 03:34:55 +00001006 );
1007 }
1008
1009 voice->position += rate;
1010 #else
1011
1012 /* Try this one out on ARM and see - similar to above but the asm
1013 on coldfire removes a redundant register load worth 1 or 2%;
1014 switching to loading two samples at once may help too. That's
1015 done above and while 6 to 7% faster on cf over two 16 bit loads
1016 it makes it endian dependant.
1017
1018 measured small improvement (~1.5%) - hcs
1019 */
1020
1021 int output;
1022
1023 if ( (this->r.g.noise_enables & vbit) == 0 )
1024 {
1025 int const fraction = voice->position & 0xfff;
1026 short const* const pos = (voice->samples + (voice->position >> 12)) + 1;
1027 output = pos[0] + ((fraction * (pos[1] - pos[0])) >> 12);
1028 } else {
1029 output = *(int16_t *)&this->noise;
1030 }
1031
1032 voice->position += rate;
1033
1034 /* old version */
1035#if 0
1036 int fraction = voice->position & 0xFFF;
1037 short const* const pos = voice->samples + (voice->position >> 12);
1038 voice->position += rate;
1039 int output =
1040 (pos [2] * fraction + pos [1] * (0x1000 - fraction)) >> 12;
1041 /* no interpolation (hardly faster, and crappy sounding) */
1042 /*int output = pos [0];*/
1043 if ( this->r.g.noise_enables & vbit )
1044 output = *(int16_t*) &this->noise;
1045#endif
1046 #endif /* CPU_COLDFIRE */
1047
1048 output = (output * voice->envx) >> 11;
1049
1050 /* duplicated here to give compiler more to run in parallel */
1051 int amp_0 = voice->volume [0] * output;
1052 int amp_1 = voice->volume [1] * output;
1053
1054 prev_outx = output;
1055 raw_voice->outx = (int8_t) (output >> 8);
1056 #endif
1057
1058 #if SPC_BRRCACHE
1059 if ( voice->position >= voice->wave_end )
1060 {
1061 long loop_len = voice->wave_loop << 12;
1062 voice->position -= loop_len;
1063 this->r.g.wave_ended |= vbit;
1064 if ( !loop_len )
1065 {
1066 this->keys_down ^= vbit;
1067 raw_voice->envx = 0;
1068 voice->envx = 0;
1069 }
1070 }
1071 #endif
1072#if 0
1073 EXIT_TIMER(dsp_gen);
1074
1075 ENTER_TIMER(dsp_mix);
1076#endif
1077 chans_0 += amp_0;
1078 chans_1 += amp_1;
1079 #if !SPC_NOECHO
1080 if ( this->r.g.echo_ons & vbit )
1081 {
1082 echo_0 += amp_0;
1083 echo_1 += amp_1;
1084 }
1085 #endif
1086#if 0
1087 EXIT_TIMER(dsp_mix);
1088#endif
1089 }
1090 /* end of voice loop */
1091
1092 #if !SPC_NOECHO
Michael Sevakisd31162a2007-02-20 10:27:39 +00001093 #ifdef CPU_COLDFIRE
1094 /* Read feedback from echo buffer */
1095 int echo_pos = this->echo_pos;
Michael Sevakis46bb37a2007-02-20 13:06:11 +00001096 uint8_t* const echo_ptr = RAM + ((echo_start + echo_pos) & 0xFFFF);
1097 echo_pos += 4;
1098 if ( echo_pos >= echo_wrap )
1099 echo_pos = 0;
Michael Sevakisd31162a2007-02-20 10:27:39 +00001100 this->echo_pos = echo_pos;
1101 int fb = swap_odd_even32(*(int32_t *)echo_ptr);
1102 int out_0, out_1;
1103
1104 /* Keep last 8 samples */
1105 *this->last_fir_ptr = fb;
1106 this->last_fir_ptr = this->fir_ptr;
1107
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001108 /* Apply echo FIR filter to output samples read from echo buffer -
1109 circular buffer is hardware incremented and masked; FIR
1110 coefficients and buffer history are loaded in parallel with
1111 multiply accumulate operations. Shift left by one here and once
1112 again when calculating feedback to have sample values justified
1113 to bit 31 in the output to ease endian swap, interleaving and
1114 clamping before placing result in the program's echo buffer. */
Michael Sevakisd31162a2007-02-20 10:27:39 +00001115 int _0, _1, _2;
Michael Sevakisc2925812007-02-23 22:39:12 +00001116 asm volatile (
Michael Sevakisd31162a2007-02-20 10:27:39 +00001117 "move.l (%[fir_c]) , %[_2] \r\n"
1118 "mac.w %[fb]u, %[_2]u, <<, (%[fir_p])+&, %[_0], %%acc0 \r\n"
1119 "mac.w %[fb]l, %[_2]u, <<, (%[fir_p])& , %[_1], %%acc1 \r\n"
1120 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1121 "mac.w %[_0]l, %[_2]l, <<, 4(%[fir_c]) , %[_2], %%acc1 \r\n"
1122 "mac.w %[_1]u, %[_2]u, <<, 4(%[fir_p])& , %[_0], %%acc0 \r\n"
1123 "mac.w %[_1]l, %[_2]u, <<, 8(%[fir_p])& , %[_1], %%acc1 \r\n"
1124 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1125 "mac.w %[_0]l, %[_2]l, <<, 8(%[fir_c]) , %[_2], %%acc1 \r\n"
1126 "mac.w %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n"
1127 "mac.w %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n"
1128 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1129 "mac.w %[_0]l, %[_2]l, <<, 12(%[fir_c]) , %[_2], %%acc1 \r\n"
1130 "mac.w %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n"
1131 "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n"
1132 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1133 "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n"
Michael Sevakisd31162a2007-02-20 10:27:39 +00001134 : [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2),
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001135 [fir_p]"+a"(this->fir_ptr)
Michael Sevakisd31162a2007-02-20 10:27:39 +00001136 : [fir_c]"a"(this->fir_coeff), [fb]"r"(fb)
1137 );
1138
1139 /* Generate output */
Michael Sevakisc2925812007-02-23 22:39:12 +00001140 asm volatile (
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001141 /* fetch filter results to eliminate stalls */
1142 "movclr.l %%acc0, %[out_0] \r\n"
1143 "movclr.l %%acc1, %[out_1] \r\n"
1144 /* apply global volume */
Michael Sevakisd31162a2007-02-20 10:27:39 +00001145 "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n"
1146 "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n"
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001147 /* apply echo volume and add to final output */
Michael Sevakisd31162a2007-02-20 10:27:39 +00001148 "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n"
1149 "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n"
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001150 : [out_0]"=&r"(out_0), [out_1]"=&r"(out_1)
Michael Sevakisd31162a2007-02-20 10:27:39 +00001151 : [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0),
1152 [ev_0]"r"((int)this->r.g.echo_volume_0),
1153 [chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1),
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001154 [ev_1]"r"((int)this->r.g.echo_volume_1)
Michael Sevakisd31162a2007-02-20 10:27:39 +00001155 );
1156
1157 /* Feedback into echo buffer */
1158 if ( !(this->r.g.flags & 0x20) )
1159 {
Michael Sevakisc2925812007-02-23 22:39:12 +00001160 asm volatile (
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001161 /* scale echo voices; saturate if overflow */
Michael Sevakisc2925812007-02-23 22:39:12 +00001162 "mac.l %[sh], %[e1] , %%acc1 \r\n"
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001163 "mac.l %[sh], %[e0] , %%acc0 \r\n"
1164 /* add scaled output from FIR filter */
Michael Sevakisd31162a2007-02-20 10:27:39 +00001165 "mac.l %[out_1], %[ef], <<, %%acc1 \r\n"
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001166 "mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
1167 /* swap and fetch feedback results - simply
1168 swap_odd_even32 mixed in between macs and
1169 movclrs to mitigate stall issues */
1170 "move.l #0x00ff00ff, %[sh] \r\n"
Michael Sevakisd31162a2007-02-20 10:27:39 +00001171 "movclr.l %%acc1, %[e1] \r\n"
1172 "swap %[e1] \r\n"
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001173 "movclr.l %%acc0, %[e0] \r\n"
Michael Sevakisd31162a2007-02-20 10:27:39 +00001174 "move.w %[e1], %[e0] \r\n"
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001175 "and.l %[e0], %[sh] \r\n"
1176 "eor.l %[sh], %[e0] \r\n"
1177 "lsl.l #8, %[sh] \r\n"
1178 "lsr.l #8, %[e0] \r\n"
1179 "or.l %[sh], %[e0] \r\n"
1180 /* save final feedback into echo buffer */
1181 "move.l %[e0], (%[echo_ptr]) \r\n"
Michael Sevakisd31162a2007-02-20 10:27:39 +00001182 : [e0]"+&d"(echo_0), [e1]"+&d"(echo_1)
1183 : [out_0]"r"(out_0), [out_1]"r"(out_1),
1184 [ef]"r"((int)this->r.g.echo_feedback),
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001185 [echo_ptr]"a"((int32_t *)echo_ptr),
1186 [sh]"d"(1 << 9)
Michael Sevakisd31162a2007-02-20 10:27:39 +00001187 );
Michael Sevakisd31162a2007-02-20 10:27:39 +00001188 }
1189
1190 /* Output final samples */
Michael Sevakisc2925812007-02-23 22:39:12 +00001191 asm volatile (
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001192 /* fetch output saved in %acc2 and %acc3 */
Michael Sevakisd31162a2007-02-20 10:27:39 +00001193 "movclr.l %%acc2, %[out_0] \r\n"
1194 "movclr.l %%acc3, %[out_1] \r\n"
Michael Sevakisf2b6ecd2007-03-03 01:19:35 +00001195 /* scale right by global_muting shift */
Michael Sevakisd31162a2007-02-20 10:27:39 +00001196 "asr.l %[gm], %[out_0] \r\n"
1197 "asr.l %[gm], %[out_1] \r\n"
1198 : [out_0]"=&d"(out_0), [out_1]"=&d"(out_1)
1199 : [gm]"d"(global_muting)
1200 );
1201
1202 out_buf [ 0] = out_0;
1203 out_buf [WAV_CHUNK_SIZE] = out_1;
1204 out_buf ++;
1205 #else /* !CPU_COLDFIRE */
Adam Gashlinb73960d2007-02-14 03:34:55 +00001206 /* Read feedback from echo buffer */
1207 int echo_pos = this->echo_pos;
1208 uint8_t* const echo_ptr = RAM +
1209 ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF);
1210 echo_pos += 4;
1211 if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 )
1212 echo_pos = 0;
1213 this->echo_pos = echo_pos;
1214 int fb_0 = GET_LE16SA( echo_ptr );
1215 int fb_1 = GET_LE16SA( echo_ptr + 2 );
1216
1217 /* Keep last 8 samples */
1218 int (* const fir_ptr) [2] = this->fir_buf + this->fir_pos;
1219 this->fir_pos = (this->fir_pos + 1) & (fir_buf_half - 1);
1220 fir_ptr [ 0] [0] = fb_0;
1221 fir_ptr [ 0] [1] = fb_1;
1222 /* duplicate at +8 eliminates wrap checking below */
1223 fir_ptr [fir_buf_half] [0] = fb_0;
1224 fir_ptr [fir_buf_half] [1] = fb_1;
1225
1226 /* Apply FIR */
1227 fb_0 *= this->fir_coeff [0];
1228 fb_1 *= this->fir_coeff [0];
1229
1230 #define DO_PT( i )\
1231 fb_0 += fir_ptr [i] [0] * this->fir_coeff [i];\
1232 fb_1 += fir_ptr [i] [1] * this->fir_coeff [i];
1233
1234 DO_PT( 1 )
1235 DO_PT( 2 )
1236 DO_PT( 3 )
1237 DO_PT( 4 )
1238 DO_PT( 5 )
1239 DO_PT( 6 )
1240 DO_PT( 7 )
1241
1242 /* Generate output */
1243 int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0)
1244 >> global_muting;
1245 int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
1246 >> global_muting;
Michael Sevakisd31162a2007-02-20 10:27:39 +00001247 out_buf [ 0] = amp_0;
1248 out_buf [WAV_CHUNK_SIZE] = amp_1;
Adam Gashlinb73960d2007-02-14 03:34:55 +00001249 out_buf ++;
1250
1251 /* Feedback into echo buffer */
1252 int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
1253 int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
1254 if ( !(this->r.g.flags & 0x20) )
1255 {
1256 CLAMP16( e0, e0 );
1257 SET_LE16A( echo_ptr , e0 );
1258 CLAMP16( e1, e1 );
1259 SET_LE16A( echo_ptr + 2, e1 );
1260 }
Michael Sevakisd31162a2007-02-20 10:27:39 +00001261 #endif /* CPU_COLDFIRE */
Adam Gashlinb73960d2007-02-14 03:34:55 +00001262 #else
Michael Sevakisd31162a2007-02-20 10:27:39 +00001263 /* Generate output */
Adam Gashlinb73960d2007-02-14 03:34:55 +00001264 int amp_0 = (chans_0 * global_vol_0) >> global_muting;
1265 int amp_1 = (chans_1 * global_vol_1) >> global_muting;
Michael Sevakisd31162a2007-02-20 10:27:39 +00001266 out_buf [ 0] = amp_0;
1267 out_buf [WAV_CHUNK_SIZE] = amp_1;
Adam Gashlinb73960d2007-02-14 03:34:55 +00001268 out_buf ++;
1269 #endif
1270 }
1271 while ( --count );
1272#if 0
1273 EXIT_TIMER(dsp);
1274 ENTER_TIMER(cpu);
1275#endif
1276}
1277
1278static inline void DSP_run( struct Spc_Dsp* this, long count, int32_t* out )
1279{
1280 /* Should we just fill the buffer with silence? Flags won't be cleared */
1281 /* during this run so it seems it should keep resetting every sample. */
1282 if ( this->r.g.flags & 0x80 )
1283 DSP_reset( this );
1284
1285 DSP_run_( this, count, out );
1286}