Patch #1203309 by David Bryant - WavPack codec fix and performance enhancement


git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6481 a1c6a512-1295-4272-9138-f99709370657
diff --git a/apps/codecs/libwavpack/bits.c b/apps/codecs/libwavpack/bits.c
index e1700df..1fe6aac 100644
--- a/apps/codecs/libwavpack/bits.c
+++ b/apps/codecs/libwavpack/bits.c
@@ -17,28 +17,6 @@
 #include "wavpack.h"
 
 #include <string.h>
-#include <ctype.h>
-
-/* dirty fix */
-const char _ctype_[257]={
-	0,
-        _C,	_C,	_C,	_C,	_C,	_C,	_C,	_C,
-        _C,	_C|_S,	_C|_S,	_C|_S,	_C|_S,	_C|_S,	_C,	_C,
-        _C,	_C,	_C,	_C,	_C,	_C,	_C,	_C,
-        _C,	_C,	_C,	_C,	_C,	_C,	_C,	_C,
-        _S|_B,	_P,	_P,	_P,	_P,	_P,	_P,	_P,
-        _P,	_P,	_P,	_P,	_P,	_P,	_P,	_P,
-        _N,	_N,	_N,	_N,	_N,	_N,	_N,	_N,
-        _N,	_N,	_P,	_P,	_P,	_P,	_P,	_P,
-        _P,	_U|_X,	_U|_X,	_U|_X,	_U|_X,	_U|_X,	_U|_X,	_U,
-        _U,	_U,	_U,	_U,	_U,	_U,	_U,	_U,
-        _U,	_U,	_U,	_U,	_U,	_U,	_U,	_U,
-        _U,	_U,	_U,	_P,	_P,	_P,	_P,	_P,
-        _P,	_L|_X,	_L|_X,	_L|_X,	_L|_X,	_L|_X,	_L|_X,	_L,
-        _L,	_L,	_L,	_L,	_L,	_L,	_L,	_L,
-        _L,	_L,	_L,	_L,	_L,	_L,	_L,	_L,
-        _L,	_L,	_L,	_P,	_P,	_P,	_P,	_C
-};
 
 ////////////////////////// Bitstream functions ////////////////////////////////
 
@@ -118,7 +96,7 @@
 		break;
 
 	    default:
-		if (isdigit (*format))
+		if (*format >= '0' && *format <= '9')
 		    cp += *format - '0';
 
 		break;
@@ -150,7 +128,7 @@
 		break;
 
 	    default:
-		if (isdigit (*format))
+		if (*format >= '0' && *format <= '9')
 		    cp += *format - '0';
 
 		break;
diff --git a/apps/codecs/libwavpack/unpack.c b/apps/codecs/libwavpack/unpack.c
index cc31b88..ae47378 100644
--- a/apps/codecs/libwavpack/unpack.c
+++ b/apps/codecs/libwavpack/unpack.c
@@ -18,20 +18,34 @@
 #include <string.h>
 #include <math.h>
 
+static void strcpy_loc (char *dst, char *src) { while (*src) *dst++ = *src++; *dst = 0; }
+
 #define LOSSY_MUTE
 
 //////////////////////////////// local macros /////////////////////////////////
 
+// these macros implement the weight application and update operations
+// that are at the heart of the decorrelation loops
+
 #define apply_weight_i(weight, sample) ((weight * sample + 512) >> 10)
 
 #define apply_weight_f(weight, sample) (((((sample & 0xffff) * weight) >> 9) + \
     (((sample & ~0xffff) >> 9) * weight) + 1) >> 1)
 
+#if 1	// PERFCOND
 #define apply_weight(weight, sample) (sample != (short) sample ? \
     apply_weight_f (weight, sample) : apply_weight_i (weight, sample))
+#else
+#define apply_weight(weight, sample) ((int32_t)((weight * (int64_t) sample + 512) >> 10))
+#endif
 
+#if 1	// PERFCOND
 #define update_weight(weight, delta, source, result) \
     if (source && result) weight -= ((((source ^ result) >> 30) & 2) - 1) * delta;
+#else
+#define update_weight(weight, delta, source, result) \
+    if (source && result) (source ^ result) < 0 ? (weight -= delta) : (weight += delta);
+#endif
 
 #define update_weight_clip(weight, delta, source, result) \
     if (source && result && ((source ^ result) < 0 ? (weight -= delta) < -1024 : (weight += delta) > 1024)) \
@@ -61,7 +75,7 @@
 
     while (read_metadata_buff (wpc, &wpmd)) {
 	if (!process_metadata (wpc, &wpmd)) {
-	    /*strcpy (wpc->error_message, "invalid metadata!");*/
+	    strcpy_loc (wpc->error_message, "invalid metadata!");
 	    return FALSE;
 	}
 
@@ -70,7 +84,7 @@
     }
 
     if (wps->wphdr.block_samples && !bs_is_open (&wps->wvbits)) {
-	/*strcpy (wpc->error_message, "invalid WavPack file!");*/
+	strcpy_loc (wpc->error_message, "invalid WavPack file!");
 	return FALSE;
     }
 
@@ -301,6 +315,9 @@
 // samples unpacked, which can be less than the number requested if an error
 // occurs or the end of the block is reached.
 
+static void decorr_mono_pass (struct decorr_pass *dpp, long *buffer, long sample_count);
+static void decorr_stereo_pass (struct decorr_pass *dpp, long *buffer, long sample_count);
+static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count);
 static void fixup_samples (WavpackStream *wps, long *buffer, ulong sample_count);
 
 long unpack_samples (WavpackContext *wpc, long *buffer, ulong sample_count)
@@ -309,8 +326,8 @@
     ulong flags = wps->wphdr.flags, crc = wps->crc, i;
     long mute_limit = (1L << ((flags & MAG_MASK) >> MAG_LSB)) + 2;
     struct decorr_pass *dpp;
-    long read_word, *bptr;
-    int tcount, m = 0;
+    long *bptr, *eptr;
+    int tcount;
 
     if (wps->sample_index + sample_count > wps->wphdr.block_index + wps->wphdr.block_samples)
 	sample_count = wps->wphdr.block_index + wps->wphdr.block_samples - wps->sample_index;
@@ -326,121 +343,59 @@
 
     ///////////////////// handle version 4 mono data /////////////////////////
 
-    if (flags & MONO_FLAG)
-	for (bptr = buffer, i = 0; i < sample_count; ++i) {
-	    if ((read_word = get_word (wps, 0)) == WORD_EOF)
+    if (flags & MONO_FLAG) {
+	eptr = buffer + sample_count;
+	i = get_words (wps, 1, sample_count, buffer);
+
+	for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++)
+	    decorr_mono_pass (dpp, buffer, sample_count);
+
+	for (bptr = buffer; bptr < eptr; ++bptr) {
+	    if (labs (bptr [0]) > mute_limit) {
+		i = bptr - buffer;
 		break;
-
-	    for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) {
-		long sam, temp;
-		int k;
-
-		if (dpp->term > MAX_TERM) {
-		    if (dpp->term & 1)
-			sam = 2 * dpp->samples_A [0] - dpp->samples_A [1];
-		    else
-			sam = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1;
-
-		    dpp->samples_A [1] = dpp->samples_A [0];
-		    k = 0;
-		}
-		else {
-		    sam = dpp->samples_A [m];
-		    k = (m + dpp->term) & (MAX_TERM - 1);
-		}
-
-		temp = apply_weight (dpp->weight_A, sam) + read_word;
-		update_weight (dpp->weight_A, dpp->delta, sam, read_word);
-		dpp->samples_A [k] = read_word = temp;
 	    }
 
-	    if (labs (read_word) > mute_limit)
-		break;
-
-	    m = (m + 1) & (MAX_TERM - 1);
-	    crc = crc * 3 + read_word;
-	    *bptr++ = read_word;
+	    crc = crc * 3 + bptr [0];
 	}
+    }
 
     //////////////////// handle version 4 stereo data ////////////////////////
 
-    else
-	for (bptr = buffer, i = 0; i < sample_count; ++i) {
-	    long left, right, left2, right2;
+    else {
+	eptr = buffer + (sample_count * 2);
+	i = get_words (wps, 2, sample_count, buffer);
 
-	    if ((left = get_word (wps, 0)) == WORD_EOF ||
-		(right = get_word (wps, 1)) == WORD_EOF)
-		    break;
-
+	if (sample_count < 16)
 	    for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++)
-		if (dpp->term > 0) {
-		    long sam_A, sam_B;
-		    int k;
+		decorr_stereo_pass (dpp, buffer, sample_count);
+	else
+	    for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) {
+		decorr_stereo_pass (dpp, buffer, 8);
+		decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8);
+	    }
 
-		    if (dpp->term > MAX_TERM) {
-			if (dpp->term & 1) {
-			    sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1];
-			    sam_B = 2 * dpp->samples_B [0] - dpp->samples_B [1];
-			}
-			else {
-			    sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1;
-			    sam_B = (3 * dpp->samples_B [0] - dpp->samples_B [1]) >> 1;
-			}
+	if (flags & JOINT_STEREO)
+	    for (bptr = buffer; bptr < eptr; bptr += 2) {
+		bptr [0] += (bptr [1] -= (bptr [0] >> 1));
 
-			dpp->samples_A [1] = dpp->samples_A [0];
-			dpp->samples_B [1] = dpp->samples_B [0];
-			k = 0;
-		    }
-		    else {
-			sam_A = dpp->samples_A [m];
-			sam_B = dpp->samples_B [m];
-			k = (m + dpp->term) & (MAX_TERM - 1);
-		    }
-
-		    left2 = apply_weight (dpp->weight_A, sam_A) + left;
-		    right2 = apply_weight (dpp->weight_B, sam_B) + right;
-
-		    update_weight (dpp->weight_A, dpp->delta, sam_A, left);
-		    update_weight (dpp->weight_B, dpp->delta, sam_B, right);
-
-		    dpp->samples_A [k] = left = left2;
-		    dpp->samples_B [k] = right = right2;
-		}
-		else if (dpp->term == -1) {
-		    left2 = left + apply_weight (dpp->weight_A, dpp->samples_A [0]);
-		    update_weight_clip (dpp->weight_A, dpp->delta, dpp->samples_A [0], left);
-		    left = left2;
-		    right2 = right + apply_weight (dpp->weight_B, left2);
-		    update_weight_clip (dpp->weight_B, dpp->delta, left2, right);
-		    dpp->samples_A [0] = right = right2;
-		}
-		else {
-		    right2 = right + apply_weight (dpp->weight_B, dpp->samples_B [0]);
-		    update_weight_clip (dpp->weight_B, dpp->delta, dpp->samples_B [0], right);
-		    right = right2;
-
-		    if (dpp->term == -3) {
-			right2 = dpp->samples_A [0];
-			dpp->samples_A [0] = right;
-		    }
-
-		    left2 = left + apply_weight (dpp->weight_A, right2);
-		    update_weight_clip (dpp->weight_A, dpp->delta, right2, left);
-		    dpp->samples_B [0] = left = left2;
+		if (labs (bptr [0]) > mute_limit || labs (bptr [1]) > mute_limit) {
+		    i = (bptr - buffer) / 2;
+		    break;
 		}
 
-	    m = (m + 1) & (MAX_TERM - 1);
+		crc = (crc * 3 + bptr [0]) * 3 + bptr [1];
+	    }
+	else
+	    for (bptr = buffer; bptr < eptr; bptr += 2) {
+		if (labs (bptr [0]) > mute_limit || labs (bptr [1]) > mute_limit) {
+		    i = (bptr - buffer) / 2;
+		    break;
+		}
 
-	    if (flags & JOINT_STEREO)
-		left += (right -= (left >> 1));
-
-	    if (labs (left) > mute_limit || labs (right) > mute_limit)
-		break;
-
-	    crc = (crc * 3 + left) * 3 + right;
-	    *bptr++ = left;
-	    *bptr++ = right;
-	}
+		crc = (crc * 3 + bptr [0]) * 3 + bptr [1];
+	    }
+    }
 
     if (i != sample_count) {
 	memset (buffer, 0, sample_count * (flags & MONO_FLAG ? 4 : 8));
@@ -448,17 +403,6 @@
 	i = sample_count;
     }
 
-    while (m--)
-	for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++)
-	    if (dpp->term > 0 && dpp->term <= MAX_TERM) {
-		long temp = dpp->samples_A [0];
-		memcpy (dpp->samples_A, dpp->samples_A + 1, sizeof (dpp->samples_A) - sizeof (dpp->samples_A [0]));
-		dpp->samples_A [MAX_TERM - 1] = temp;
-		temp = dpp->samples_B [0];
-		memcpy (dpp->samples_B, dpp->samples_B + 1, sizeof (dpp->samples_B) - sizeof (dpp->samples_B [0]));
-		dpp->samples_B [MAX_TERM - 1] = temp;
-	    }
-
     fixup_samples (wps, buffer, i);
 
     if (flags & FLOAT_DATA)
@@ -471,6 +415,286 @@
     return i;
 }
 
+static void decorr_stereo_pass (struct decorr_pass *dpp, long *buffer, long sample_count)
+{
+    long delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B;
+    long *bptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B;
+    int m, k;
+
+    switch (dpp->term) {
+
+	case 17:
+	    for (bptr = buffer; bptr < eptr; bptr += 2) {
+		sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1];
+		dpp->samples_A [1] = dpp->samples_A [0];
+		dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0];
+		update_weight (weight_A, delta, sam_A, bptr [0]);
+		bptr [0] = dpp->samples_A [0];
+
+		sam_A = 2 * dpp->samples_B [0] - dpp->samples_B [1];
+		dpp->samples_B [1] = dpp->samples_B [0];
+		dpp->samples_B [0] = apply_weight (weight_B, sam_A) + bptr [1];
+		update_weight (weight_B, delta, sam_A, bptr [1]);
+		bptr [1] = dpp->samples_B [0];
+	    }
+
+	    break;
+
+	case 18:
+	    for (bptr = buffer; bptr < eptr; bptr += 2) {
+		sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1;
+		dpp->samples_A [1] = dpp->samples_A [0];
+		dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0];
+		update_weight (weight_A, delta, sam_A, bptr [0]);
+		bptr [0] = dpp->samples_A [0];
+
+		sam_A = (3 * dpp->samples_B [0] - dpp->samples_B [1]) >> 1;
+		dpp->samples_B [1] = dpp->samples_B [0];
+		dpp->samples_B [0] = apply_weight (weight_B, sam_A) + bptr [1];
+		update_weight (weight_B, delta, sam_A, bptr [1]);
+		bptr [1] = dpp->samples_B [0];
+	    }
+
+	    break;
+
+	default:
+	    for (m = 0, k = dpp->term & (MAX_TERM - 1), bptr = buffer; bptr < eptr; bptr += 2) {
+		sam_A = dpp->samples_A [m];
+		dpp->samples_A [k] = apply_weight (weight_A, sam_A) + bptr [0];
+		update_weight (weight_A, delta, sam_A, bptr [0]);
+		bptr [0] = dpp->samples_A [k];
+
+		sam_A = dpp->samples_B [m];
+		dpp->samples_B [k] = apply_weight (weight_B, sam_A) + bptr [1];
+		update_weight (weight_B, delta, sam_A, bptr [1]);
+		bptr [1] = dpp->samples_B [k];
+
+		m = (m + 1) & (MAX_TERM - 1);
+		k = (k + 1) & (MAX_TERM - 1);
+	    }
+
+	    if (m) {
+		long temp_samples [MAX_TERM];
+
+		memcpy (temp_samples, dpp->samples_A, sizeof (dpp->samples_A));
+
+		for (k = 0; k < MAX_TERM; k++, m++)
+		    dpp->samples_A [k] = temp_samples [m & (MAX_TERM - 1)];
+
+		memcpy (temp_samples, dpp->samples_B, sizeof (dpp->samples_B));
+
+		for (k = 0; k < MAX_TERM; k++, m++)
+		    dpp->samples_B [k] = temp_samples [m & (MAX_TERM - 1)];
+	    }
+
+	    break;
+
+	case -1:
+	    for (bptr = buffer; bptr < eptr; bptr += 2) {
+		sam_A = bptr [0] + apply_weight (weight_A, dpp->samples_A [0]);
+		update_weight_clip (weight_A, delta, dpp->samples_A [0], bptr [0]);
+		bptr [0] = sam_A;
+		dpp->samples_A [0] = bptr [1] + apply_weight (weight_B, sam_A);
+		update_weight_clip (weight_B, delta, sam_A, bptr [1]);
+		bptr [1] = dpp->samples_A [0];
+	    }
+
+	    break;
+
+	case -2:
+	    for (bptr = buffer; bptr < eptr; bptr += 2) {
+		sam_B = bptr [1] + apply_weight (weight_B, dpp->samples_B [0]);
+		update_weight_clip (weight_B, delta, dpp->samples_B [0], bptr [1]);
+		bptr [1] = sam_B;
+		dpp->samples_B [0] = bptr [0] + apply_weight (weight_A, sam_B);
+		update_weight_clip (weight_A, delta, sam_B, bptr [0]);
+		bptr [0] = dpp->samples_B [0];
+	    }
+
+	    break;
+
+	case -3:
+	    for (bptr = buffer; bptr < eptr; bptr += 2) {
+		sam_A = bptr [0] + apply_weight (weight_A, dpp->samples_A [0]);
+		update_weight_clip (weight_A, delta, dpp->samples_A [0], bptr [0]);
+		sam_B = bptr [1] + apply_weight (weight_B, dpp->samples_B [0]);
+		update_weight_clip (weight_B, delta, dpp->samples_B [0], bptr [1]);
+		bptr [0] = dpp->samples_B [0] = sam_A;
+		bptr [1] = dpp->samples_A [0] = sam_B;
+	    }
+
+	    break;
+    }
+
+    dpp->weight_A = weight_A;
+    dpp->weight_B = weight_B;
+}
+
+static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count)
+{
+    long delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B;
+    long *bptr, *tptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B;
+    int k;
+
+    switch (dpp->term) {
+
+	case 17:
+	    for (bptr = buffer; bptr < eptr; bptr += 2) {
+		sam_A = 2 * bptr [-2] - bptr [-4];
+		bptr [0] = apply_weight (weight_A, sam_A) + (sam_B = bptr [0]);
+		update_weight (weight_A, delta, sam_A, sam_B);
+
+		sam_A = 2 * bptr [-1] - bptr [-3];
+		bptr [1] = apply_weight (weight_B, sam_A) + (sam_B = bptr [1]);
+		update_weight (weight_B, delta, sam_A, sam_B);
+	    }
+
+	    dpp->samples_B [0] = bptr [-1];
+	    dpp->samples_A [0] = bptr [-2];
+	    dpp->samples_B [1] = bptr [-3];
+	    dpp->samples_A [1] = bptr [-4];
+	    break;
+
+	case 18:
+	    for (bptr = buffer; bptr < eptr; bptr += 2) {
+		sam_A = (3 * bptr [-2] - bptr [-4]) >> 1;
+		bptr [0] = apply_weight (weight_A, sam_A) + (sam_B = bptr [0]);
+		update_weight (weight_A, delta, sam_A, sam_B);
+
+		sam_A = (3 * bptr [-1] - bptr [-3]) >> 1;
+		bptr [1] = apply_weight (weight_B, sam_A) + (sam_B = bptr [1]);
+		update_weight (weight_B, delta, sam_A, sam_B);
+	    }
+
+	    dpp->samples_B [0] = bptr [-1];
+	    dpp->samples_A [0] = bptr [-2];
+	    dpp->samples_B [1] = bptr [-3];
+	    dpp->samples_A [1] = bptr [-4];
+	    break;
+
+	default:
+	    for (bptr = buffer, tptr = buffer - (dpp->term * 2); bptr < eptr; bptr += 2, tptr += 2) {
+		bptr [0] = apply_weight (weight_A, tptr [0]) + (sam_A = bptr [0]);
+		update_weight (weight_A, delta, tptr [0], sam_A);
+
+		bptr [1] = apply_weight (weight_B, tptr [1]) + (sam_A = bptr [1]);
+		update_weight (weight_B, delta, tptr [1], sam_A);
+	    }
+
+	    k = dpp->term;
+	    dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-1];
+	    dpp->samples_A [  k & (MAX_TERM - 1)] = bptr [-2];
+	    dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-3];
+	    dpp->samples_A [  k & (MAX_TERM - 1)] = bptr [-4];
+	    dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-5];
+	    dpp->samples_A [  k & (MAX_TERM - 1)] = bptr [-6];
+	    dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-7];
+	    dpp->samples_A [  k & (MAX_TERM - 1)] = bptr [-8];
+	    dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-9];
+	    dpp->samples_A [  k & (MAX_TERM - 1)] = bptr [-10];
+	    dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-11];
+	    dpp->samples_A [  k & (MAX_TERM - 1)] = bptr [-12];
+	    dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-13];
+	    dpp->samples_A [  k & (MAX_TERM - 1)] = bptr [-14];
+	    dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-15];
+	    dpp->samples_A [  k & (MAX_TERM - 1)] = bptr [-16];
+	    break;
+
+	case -1:
+	    for (bptr = buffer; bptr < eptr; bptr += 2) {
+		bptr [0] = apply_weight (weight_A, bptr [-1]) + (sam_A = bptr [0]);
+		update_weight_clip (weight_A, delta, bptr [-1], sam_A);
+		bptr [1] = apply_weight (weight_B, bptr [0]) + (sam_A = bptr [1]);
+		update_weight_clip (weight_B, delta, bptr [0], sam_A);
+	    }
+
+	    dpp->samples_A [0] = bptr [-1];
+	    break;
+
+	case -2:
+	    for (bptr = buffer; bptr < eptr; bptr += 2) {
+		bptr [1] = apply_weight (weight_B, bptr [-2]) + (sam_A = bptr [1]);
+		update_weight_clip (weight_B, delta, bptr [-2], sam_A);
+		bptr [0] = apply_weight (weight_A, bptr [1]) + (sam_A = bptr [0]);
+		update_weight_clip (weight_A, delta, bptr [1], sam_A);
+	    }
+
+	    dpp->samples_B [0] = bptr [-2];
+	    break;
+
+	case -3:
+	    for (bptr = buffer; bptr < eptr; bptr += 2) {
+		bptr [0] = apply_weight (weight_A, bptr [-1]) + (sam_A = bptr [0]);
+		update_weight_clip (weight_A, delta, bptr [-1], sam_A);
+		bptr [1] = apply_weight (weight_B, bptr [-2]) + (sam_A = bptr [1]);
+		update_weight_clip (weight_B, delta, bptr [-2], sam_A);
+	    }
+
+	    dpp->samples_A [0] = bptr [-1];
+	    dpp->samples_B [0] = bptr [-2];
+	    break;
+    }
+
+    dpp->weight_A = weight_A;
+    dpp->weight_B = weight_B;
+}
+
+static void decorr_mono_pass (struct decorr_pass *dpp, long *buffer, long sample_count)
+{
+    long delta = dpp->delta, weight_A = dpp->weight_A;
+    long *bptr, *eptr = buffer + sample_count, sam_A;
+    int m, k;
+
+    switch (dpp->term) {
+
+	case 17:
+	    for (bptr = buffer; bptr < eptr; bptr++) {
+		sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1];
+		dpp->samples_A [1] = dpp->samples_A [0];
+		dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0];
+		update_weight (weight_A, delta, sam_A, bptr [0]);
+		bptr [0] = dpp->samples_A [0];
+	    }
+
+	    break;
+
+	case 18:
+	    for (bptr = buffer; bptr < eptr; bptr++) {
+		sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1;
+		dpp->samples_A [1] = dpp->samples_A [0];
+		dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0];
+		update_weight (weight_A, delta, sam_A, bptr [0]);
+		bptr [0] = dpp->samples_A [0];
+	    }
+
+	    break;
+
+	default:
+	    for (m = 0, k = dpp->term & (MAX_TERM - 1), bptr = buffer; bptr < eptr; bptr++) {
+		sam_A = dpp->samples_A [m];
+		dpp->samples_A [k] = apply_weight (weight_A, sam_A) + bptr [0];
+		update_weight (weight_A, delta, sam_A, bptr [0]);
+		bptr [0] = dpp->samples_A [k];
+		m = (m + 1) & (MAX_TERM - 1);
+		k = (k + 1) & (MAX_TERM - 1);
+	    }
+
+	    if (m) {
+		long temp_samples [MAX_TERM];
+
+		memcpy (temp_samples, dpp->samples_A, sizeof (dpp->samples_A));
+
+		for (k = 0; k < MAX_TERM; k++, m++)
+		    dpp->samples_A [k] = temp_samples [m & (MAX_TERM - 1)];
+	    }
+
+	    break;
+    }
+
+    dpp->weight_A = weight_A;
+}
+
+
 // This is a helper function for unpack_samples() that applies several final
 // operations. First, if the data is 32-bit float data, then that conversion
 // is done in the float.c module (whether lossy or lossless) and we return.
@@ -513,7 +737,6 @@
 
     if (flags & HYBRID_FLAG) {
 	long min_value, max_value, min_shifted, max_shifted;
-        min_value = max_value = min_shifted = max_shifted = 0;
 
 	switch (flags & BYTES_STORED) {
 	    case 0:
@@ -532,9 +755,9 @@
 		break;
 
 	    case 3:
-                // 0x80000000 is the same as 2147483648
-                min_shifted = (min_value = -0x80000000 >> shift) << shift;
-                max_shifted = (max_value = 0x80000000 >> shift) << shift;
+	    default:
+                min_shifted = (min_value = (long) 0x80000000 >> shift) << shift;
+                max_shifted = (max_value = (long) 0x7FFFFFFF >> shift) << shift;
 		break;
 	}
 
diff --git a/apps/codecs/libwavpack/wavpack.h b/apps/codecs/libwavpack/wavpack.h
index 6f74d95..06f86cb 100644
--- a/apps/codecs/libwavpack/wavpack.h
+++ b/apps/codecs/libwavpack/wavpack.h
@@ -291,7 +291,7 @@
 
 int read_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd);
 int read_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd);
-long get_word (WavpackStream *wps, int chan);
+long get_words (WavpackStream *wps, int nchans, int nsamples, long *buffer);
 long exp2s (int log);
 int restore_weight (char weight);
 
diff --git a/apps/codecs/libwavpack/words.c b/apps/codecs/libwavpack/words.c
index 35061b6..370c0d4 100644
--- a/apps/codecs/libwavpack/words.c
+++ b/apps/codecs/libwavpack/words.c
@@ -123,9 +123,20 @@
     0xea, 0xec, 0xed, 0xee, 0xf0, 0xf1, 0xf2, 0xf4, 0xf5, 0xf6, 0xf8, 0xf9, 0xfa, 0xfc, 0xfd, 0xff
 };
 
+static const char ones_count_table [] = {
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8
+};
+
 ///////////////////////////// executable code ////////////////////////////////
 
-static int log2 (unsigned long avalue);
+static int mylog2 (unsigned long avalue);
 
 // Read the median log2 values from the specifed metadata structure, convert
 // them back to 32-bit unsigned values and store them. If length is not
@@ -270,140 +281,167 @@
 // of WORD_EOF indicates that the end of the bitstream was reached (all 1s) or
 // some other error occurred.
 
-long get_word (WavpackStream *wps, int chan)
+long get_words (WavpackStream *wps, int nchans, int nsamples, long *buffer)
 {
-    ulong ones_count, low, mid, high;
-    int sign;
+    ulong tsamples = nsamples * nchans, ones_count, low, mid, high;
+    int next8, sign, chan;
+    long *bptr = buffer;
 
-    if (wps->w.zeros_acc) {
-	if (--wps->w.zeros_acc) {
-	    wps->w.slow_level [chan] -= (wps->w.slow_level [chan] + SLO) >> SLS;
-	    return 0;
-	}
-    }
-    else if (!wps->w.holding_zero && !wps->w.holding_one && !(wps->w.median [0] [0] & ~1) && !(wps->w.median [0] [1] & ~1)) {
-	ulong mask;
-	int cbits;
+    while (tsamples--) {
 
-	for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits);
+	chan = (nchans == 1) ? 0 : (~tsamples & 1);
 
-	if (cbits == 33)
-	    return WORD_EOF;
-
-	if (cbits < 2)
-	    wps->w.zeros_acc = cbits;
-	else {
-	    for (mask = 1, wps->w.zeros_acc = 0; --cbits; mask <<= 1)
-		if (getbit (&wps->wvbits))
-		    wps->w.zeros_acc |= mask;
-
-	    wps->w.zeros_acc |= mask;
-	}
-
-	if (wps->w.zeros_acc) {
-	    wps->w.slow_level [chan] -= (wps->w.slow_level [chan] + SLO) >> SLS;
-	    CLEAR (wps->w.median);
-	    return 0;
-	}
-    }
-
-    if (wps->w.holding_zero)
-	ones_count = wps->w.holding_zero = 0;
-    else {
-#ifdef LIMIT_ONES
-	for (ones_count = 0; ones_count < (LIMIT_ONES + 1) && getbit (&wps->wvbits); ++ones_count);
-
-	if (ones_count == (LIMIT_ONES + 1))
-	    return WORD_EOF;
-
-	if (ones_count == LIMIT_ONES) {
+	if (!(wps->w.median [0] [0] & ~1) && !wps->w.holding_zero && !wps->w.holding_one && !(wps->w.median [0] [1] & ~1)) {
 	    ulong mask;
 	    int cbits;
 
-	    for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits);
-
-	    if (cbits == 33)
-		return WORD_EOF;
-
-	    if (cbits < 2)
-		ones_count = cbits;
+	    if (wps->w.zeros_acc) {
+		if (--wps->w.zeros_acc) {
+		    wps->w.slow_level [chan] -= (wps->w.slow_level [chan] + SLO) >> SLS;
+		    *bptr++ = 0;
+		    continue;
+		}
+	    }
 	    else {
-		for (mask = 1, ones_count = 0; --cbits; mask <<= 1)
-		    if (getbit (&wps->wvbits))
+		for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits);
+
+		if (cbits == 33)
+		    break;
+
+		if (cbits < 2)
+		    wps->w.zeros_acc = cbits;
+		else {
+		    for (mask = 1, wps->w.zeros_acc = 0; --cbits; mask <<= 1)
+			if (getbit (&wps->wvbits))
+			    wps->w.zeros_acc |= mask;
+
+		    wps->w.zeros_acc |= mask;
+		}
+
+		if (wps->w.zeros_acc) {
+		    wps->w.slow_level [chan] -= (wps->w.slow_level [chan] + SLO) >> SLS;
+		    CLEAR (wps->w.median);
+		    *bptr++ = 0;
+		    continue;
+		}
+	    }
+	}
+
+	if (wps->w.holding_zero)
+	    ones_count = wps->w.holding_zero = 0;
+	else {
+	    if (wps->wvbits.bc < 8) {
+		if (++(wps->wvbits.ptr) == wps->wvbits.end)
+		    wps->wvbits.wrap (&wps->wvbits);
+
+		next8 = (wps->wvbits.sr |= *(wps->wvbits.ptr) << wps->wvbits.bc) & 0xff;
+		wps->wvbits.bc += 8;
+	    }
+	    else
+		next8 = wps->wvbits.sr & 0xff;
+
+	    if (next8 == 0xff) {
+		wps->wvbits.bc -= 8;
+		wps->wvbits.sr >>= 8;
+
+		for (ones_count = 8; ones_count < (LIMIT_ONES + 1) && getbit (&wps->wvbits); ++ones_count);
+
+		if (ones_count == (LIMIT_ONES + 1))
+		    break;
+
+		if (ones_count == LIMIT_ONES) {
+		    ulong mask;
+		    int cbits;
+
+		    for (cbits = 0; cbits < 33 && getbit (&wps->wvbits); ++cbits);
+
+		    if (cbits == 33)
+			break;
+
+		    if (cbits < 2)
+			ones_count = cbits;
+		    else {
+			for (mask = 1, ones_count = 0; --cbits; mask <<= 1)
+			    if (getbit (&wps->wvbits))
+				ones_count |= mask;
+
 			ones_count |= mask;
+		    }
 
-		ones_count |= mask;
-	    }
-
-	    ones_count += LIMIT_ONES;
-	}
-#else
-	for (ones_count = 0; getbit (&wps->wvbits); ++ones_count);
-#endif
-
-	if (wps->w.holding_one) {
-	    wps->w.holding_one = ones_count & 1;
-	    ones_count = (ones_count >> 1) + 1;
-	}
-	else {
-	    wps->w.holding_one = ones_count & 1;
-	    ones_count >>= 1;
-	}
-
-	wps->w.holding_zero = ~wps->w.holding_one & 1;
-    }
-
-    if ((wps->wphdr.flags & HYBRID_FLAG) && !chan)
-	update_error_limit (wps);
-
-    if (ones_count == 0) {
-	low = 0;
-	high = GET_MED (0) - 1;
-	DEC_MED0 ();
-    }
-    else {
-	low = GET_MED (0);
-	INC_MED0 ();
-
-	if (ones_count == 1) {
-	    high = low + GET_MED (1) - 1;
-	    DEC_MED1 ();
-	}
-	else {
-	    low += GET_MED (1);
-	    INC_MED1 ();
-
-	    if (ones_count == 2) {
-		high = low + GET_MED (2) - 1;
-		DEC_MED2 ();
+		    ones_count += LIMIT_ONES;
+		}
 	    }
 	    else {
-		low += (ones_count - 2) * GET_MED (2);
-		high = low + GET_MED (2) - 1;
-		INC_MED2 ();
+		wps->wvbits.bc -= (ones_count = ones_count_table [next8]) + 1;
+		wps->wvbits.sr >>= ones_count + 1;
+	    }
+
+	    if (wps->w.holding_one) {
+		wps->w.holding_one = ones_count & 1;
+		ones_count = (ones_count >> 1) + 1;
+	    }
+	    else {
+		wps->w.holding_one = ones_count & 1;
+		ones_count >>= 1;
+	    }
+
+	    wps->w.holding_zero = ~wps->w.holding_one & 1;
+	}
+
+	if ((wps->wphdr.flags & HYBRID_FLAG) && !chan)
+	    update_error_limit (wps);
+
+	if (ones_count == 0) {
+	    low = 0;
+	    high = GET_MED (0) - 1;
+	    DEC_MED0 ();
+	}
+	else {
+	    low = GET_MED (0);
+	    INC_MED0 ();
+
+	    if (ones_count == 1) {
+		high = low + GET_MED (1) - 1;
+		DEC_MED1 ();
+	    }
+	    else {
+		low += GET_MED (1);
+		INC_MED1 ();
+
+		if (ones_count == 2) {
+		    high = low + GET_MED (2) - 1;
+		    DEC_MED2 ();
+		}
+		else {
+		    low += (ones_count - 2) * GET_MED (2);
+		    high = low + GET_MED (2) - 1;
+		    INC_MED2 ();
+		}
 	    }
 	}
+
+	mid = (high + low + 1) >> 1;
+
+	if (!wps->w.error_limit [chan])
+	    mid = read_code (&wps->wvbits, high - low) + low;
+	else while (high - low > wps->w.error_limit [chan]) {
+	    if (getbit (&wps->wvbits))
+		mid = (high + (low = mid) + 1) >> 1;
+	    else
+		mid = ((high = mid - 1) + low + 1) >> 1;
+	}
+
+	sign = getbit (&wps->wvbits);
+
+	if (wps->wphdr.flags & HYBRID_BITRATE) {
+	    wps->w.slow_level [chan] -= (wps->w.slow_level [chan] + SLO) >> SLS;
+	    wps->w.slow_level [chan] += mylog2 (mid);
+	}
+
+	*bptr++ = sign ? ~mid : mid;
     }
 
-    mid = (high + low + 1) >> 1;
-
-    if (!wps->w.error_limit [chan])
-	mid = read_code (&wps->wvbits, high - low) + low;
-    else while (high - low > wps->w.error_limit [chan]) {
-	if (getbit (&wps->wvbits))
-	    mid = (high + (low = mid) + 1) >> 1;
-	else
-	    mid = ((high = mid - 1) + low + 1) >> 1;
-    }
-
-    sign = getbit (&wps->wvbits);
-
-    if (wps->wphdr.flags & HYBRID_BITRATE) {
-	wps->w.slow_level [chan] -= (wps->w.slow_level [chan] + SLO) >> SLS;
-	wps->w.slow_level [chan] += log2 (mid);
-    }
-
-    return sign ? ~mid : mid;
+    return nchans == 1 ? (bptr - buffer) : ((bptr - buffer) / 2);
 }
 
 // Read a single unsigned value from the specified bitstream with a value
@@ -448,7 +486,7 @@
 // This function returns the log2 for the specified 32-bit unsigned value.
 // The maximum value allowed is about 0xff800000 and returns 8447.
 
-static int log2 (unsigned long avalue)
+static int mylog2 (unsigned long avalue)
 {
     int dbits;
 
diff --git a/apps/codecs/libwavpack/wputils.c b/apps/codecs/libwavpack/wputils.c
index 1b8fae0..9227b66 100644
--- a/apps/codecs/libwavpack/wputils.c
+++ b/apps/codecs/libwavpack/wputils.c
@@ -19,6 +19,8 @@
 
 #include <string.h>
 
+static void strcpy_loc (char *dst, char *src) { while (*src) *dst++ = *src++; *dst = 0; }
+
 ///////////////////////////// local table storage ////////////////////////////
 
 const ulong sample_rates [] = { 6000, 8000, 9600, 11025, 12000, 16000, 22050,
@@ -49,7 +51,6 @@
 {
     WavpackStream *wps = &wpc.stream;
     ulong bcount;
-    (void)error;
 
     CLEAR (wpc);
     wpc.infile = infile;
@@ -64,12 +65,12 @@
 	bcount = read_next_header (wpc.infile, &wps->wphdr);
 
 	if (bcount == (ulong) -1) {
-	    /*strcpy (error, "not compatible with this version of WavPack file!");*/
+	    strcpy_loc (error, "invalid WavPack file!");
 	    return NULL;
 	}
 
 	if ((wps->wphdr.flags & UNKNOWN_FLAGS) || wps->wphdr.version < 0x402 || wps->wphdr.version > 0x40f) {
-	    /*strcpy (error, "not compatible with this version of WavPack file!");*/
+	    strcpy_loc (error, "invalid WavPack file!");
 	    return NULL;
 	}
 
@@ -77,8 +78,8 @@
 	    wpc.total_samples = wps->wphdr.total_samples;
 
 	if (!unpack_init (&wpc)) {
-	    /*strcpy (error, wpc.error_message [0] ? wpc.error_message :
-		"not compatible with this version of WavPack file!");*/
+	    strcpy_loc (error, wpc.error_message [0] ? wpc.error_message :
+		"invalid WavPack file!");
 
 	    return NULL;
 	}
@@ -170,7 +171,7 @@
 		    break;
 
 		if (wps->wphdr.version < 0x402 || wps->wphdr.version > 0x40f) {
-		    /*strcpy (wpc->error_message, "not compatible with this version of WavPack file!");*/
+		    strcpy_loc (wpc->error_message, "invalid WavPack file!");
 		    break;
 		}
 
diff --git a/docs/CREDITS b/docs/CREDITS
index 99c020b..0dcae97 100644
--- a/docs/CREDITS
+++ b/docs/CREDITS
@@ -112,3 +112,4 @@
 Richard Ottó O'Brien
 Luca Burelli
 Alessio Lenzi
+David Bryant