Sync to upstream libopus
Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c
This brings in a bunch of optimizations to decode speed
and memory usage. Allocations are switched from using
the pseudostack to using the real stack. Enabled hacks
to reduce stack usage.
This should fix crashes on sansa clip, although some
files will not play due to failing allocations in the
codec buffer.
Speeds up decoding of the following test files:
H300 (cf) C200 (arm7tdmi) ipod classic (arm9e)
16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz
64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz
128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz
Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
diff --git a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h
index cc52f37..8ddb9ad 100644
--- a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h
+++ b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h
@@ -65,10 +65,6 @@
do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
(m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
-# define C_MUL4(m,a,b) \
- do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \
- (m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0)
-
# define C_MULBYSCALAR( c, s ) \
do{ (c).r = S_MUL( (c).r , s ) ;\
(c).i = S_MUL( (c).i , s ) ; }while(0)
diff --git a/lib/rbcodec/codecs/libopus/celt/arch.h b/lib/rbcodec/codecs/libopus/celt/arch.h
index b2d26c4..035b92f 100644
--- a/lib/rbcodec/codecs/libopus/celt/arch.h
+++ b/lib/rbcodec/codecs/libopus/celt/arch.h
@@ -69,11 +69,9 @@
#define IMUL32(a,b) ((a)*(b))
-#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */
-#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */
+#define ABS(x) ((x) < 0 ? (-(x)) : (x))
#define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */
#define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */
-#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */
#define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */
#define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */
#define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */
@@ -108,6 +106,13 @@
#define SCALEIN(a) (a)
#define SCALEOUT(a) (a)
+#define ABS16(x) ((x) < 0 ? (-(x)) : (x))
+#define ABS32(x) ((x) < 0 ? (-(x)) : (x))
+
+static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
+ return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
+}
+
#ifdef FIXED_DEBUG
#include "fixed_debug.h"
#else
@@ -139,6 +144,22 @@
typedef float celt_norm;
typedef float celt_ener;
+#ifdef FLOAT_APPROX
+/* This code should reliably detect NaN/inf even when -ffast-math is used.
+ Assumes IEEE 754 format. */
+static OPUS_INLINE int celt_isnan(float x)
+{
+ union {float f; opus_uint32 i;} in;
+ in.f = x;
+ return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0;
+}
+#else
+#ifdef __FAST_MATH__
+#error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input
+#endif
+#define celt_isnan(x) ((x)!=(x))
+#endif
+
#define Q15ONE 1.0f
#define NORM_SCALING 1.f
@@ -148,6 +169,10 @@
#define VERY_LARGE16 1e15f
#define Q15_ONE ((opus_val16)1.f)
+/* This appears to be the same speed as C99's fabsf() but it's more portable. */
+#define ABS16(x) ((float)fabs(x))
+#define ABS32(x) ((float)fabs(x))
+
#define QCONST16(x,bits) (x)
#define QCONST32(x,bits) (x)
@@ -186,6 +211,7 @@
#define MULT32_32_Q31(a,b) ((a)*(b))
#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
+#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b))
#define MULT16_16_Q11_32(a,b) ((a)*(b))
#define MULT16_16_Q11(a,b) ((a)*(b))
@@ -203,6 +229,8 @@
#define SCALEIN(a) ((a)*CELT_SIG_SCALE)
#define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE))
+#define SIG2WORD16(x) (x)
+
#endif /* !FIXED_POINT */
#ifndef GLOBAL_STACK_SIZE
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h
index b690bc8..efb3b18 100644
--- a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h
+++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h
@@ -68,6 +68,10 @@
#undef MAC16_32_Q15
#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))
+/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
+ Result fits in 32 bits. */
+#undef MAC16_32_Q16
+#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b))
/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
#undef MULT32_32_Q31
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h
index 1194a7d..36a6321 100644
--- a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h
+++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h
@@ -82,6 +82,23 @@
}
#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
+/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
+ Result fits in 32 bits. */
+#undef MAC16_32_Q16
+static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a,
+ opus_val32 b)
+{
+ int res;
+ __asm__(
+ "#MAC16_32_Q16\n\t"
+ "smlawb %0, %1, %2, %3;\n"
+ : "=r"(res)
+ : "r"(b), "r"(a), "r"(c)
+ );
+ return res;
+}
+#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b))
+
/** 16x16 multiply-add where the result fits in 32 bits */
#undef MAC16_16
static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
@@ -113,4 +130,22 @@
}
#define MULT16_16(a, b) (MULT16_16_armv5e(a, b))
+#ifdef OPUS_ARM_INLINE_MEDIA
+
+#undef SIG2WORD16
+static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x)
+{
+ celt_sig res;
+ __asm__(
+ "#SIG2WORD16\n\t"
+ "ssat %0, #16, %1, ASR #12\n\t"
+ : "=r"(res)
+ : "r"(x+2048)
+ );
+ return EXTRACT16(res);
+}
+#define SIG2WORD16(x) (SIG2WORD16_armv6(x))
+
+#endif /* OPUS_ARM_INLINE_MEDIA */
+
#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/bands.c b/lib/rbcodec/codecs/libopus/celt/bands.c
index 1ad786d..caa7016 100644
--- a/lib/rbcodec/codecs/libopus/celt/bands.c
+++ b/lib/rbcodec/codecs/libopus/celt/bands.c
@@ -93,11 +93,11 @@
#if 0
#ifdef FIXED_POINT
/* Compute the amplitude (sqrt energy) in each of the bands */
-void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
{
int i, c, N;
const opus_int16 *eBands = m->eBands;
- N = M*m->shortMdctSize;
+ N = m->shortMdctSize<<LM;
c=0; do {
for (i=0;i<end;i++)
{
@@ -105,18 +105,23 @@
opus_val32 maxval=0;
opus_val32 sum = 0;
- j=M*eBands[i]; do {
- maxval = MAX32(maxval, X[j+c*N]);
- maxval = MAX32(maxval, -X[j+c*N]);
- } while (++j<M*eBands[i+1]);
-
+ maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
if (maxval > 0)
{
- int shift = celt_ilog2(maxval)-10;
- j=M*eBands[i]; do {
- sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)),
- EXTRACT16(VSHR32(X[j+c*N],shift)));
- } while (++j<M*eBands[i+1]);
+ int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1);
+ j=eBands[i]<<LM;
+ if (shift>0)
+ {
+ do {
+ sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)),
+ EXTRACT16(SHR32(X[j+c*N],shift)));
+ } while (++j<eBands[i+1]<<LM);
+ } else {
+ do {
+ sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)),
+ EXTRACT16(SHL32(X[j+c*N],-shift)));
+ } while (++j<eBands[i+1]<<LM);
+ }
/* We're adding one here to ensure the normalized band isn't larger than unity norm */
bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
} else {
@@ -151,18 +156,16 @@
#else /* FIXED_POINT */
/* Compute the amplitude (sqrt energy) in each of the bands */
-void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
{
int i, c, N;
const opus_int16 *eBands = m->eBands;
- N = M*m->shortMdctSize;
+ N = m->shortMdctSize<<LM;
c=0; do {
for (i=0;i<end;i++)
{
- int j;
- opus_val32 sum = 1e-27f;
- for (j=M*eBands[i];j<M*eBands[i+1];j++)
- sum += X[j+c*N]*X[j+c*N];
+ opus_val32 sum;
+ sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
bandE[i+c*m->nbEBands] = celt_sqrt(sum);
/*printf ("%f ", bandE[i+c*m->nbEBands]);*/
}
@@ -192,74 +195,80 @@
/* De-normalise the energy to produce the synthesis from the unit-energy bands */
void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
- celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M)
+ celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start,
+ int end, int M, int downsample, int silence)
{
- int i, c, N;
+ int i, N;
+ int bound;
+ celt_sig * OPUS_RESTRICT f;
+ const celt_norm * OPUS_RESTRICT x;
const opus_int16 *eBands = m->eBands;
N = M*m->shortMdctSize;
- celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels");
- c=0; do {
- celt_sig * OPUS_RESTRICT f;
- const celt_norm * OPUS_RESTRICT x;
- f = freq+c*N;
- x = X+c*N+M*eBands[start];
- for (i=0;i<M*eBands[start];i++)
- *f++ = 0;
- for (i=start;i<end;i++)
- {
- int j, band_end;
- opus_val16 g;
- opus_val16 lg;
+ bound = M*eBands[end];
+ if (downsample!=1)
+ bound = IMIN(bound, N/downsample);
+ if (silence)
+ {
+ bound = 0;
+ start = end = 0;
+ }
+ f = freq;
+ x = X+M*eBands[start];
+ for (i=0;i<M*eBands[start];i++)
+ *f++ = 0;
+ for (i=start;i<end;i++)
+ {
+ int j, band_end;
+ opus_val16 g;
+ opus_val16 lg;
#ifdef FIXED_POINT
- int shift;
+ int shift;
#endif
- j=M*eBands[i];
- band_end = M*eBands[i+1];
- lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6));
+ j=M*eBands[i];
+ band_end = M*eBands[i+1];
+ lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6));
#ifndef FIXED_POINT
- g = celt_exp2(lg);
+ g = celt_exp2(lg);
#else
- /* Handle the integer part of the log energy */
- shift = 16-(lg>>DB_SHIFT);
- if (shift>31)
- {
- shift=0;
- g=0;
- } else {
- /* Handle the fractional part. */
- g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
- }
- /* Handle extreme gains with negative shift. */
- if (shift<0)
- {
- /* For shift < -2 we'd be likely to overflow, so we're capping
+ /* Handle the integer part of the log energy */
+ shift = 16-(lg>>DB_SHIFT);
+ if (shift>31)
+ {
+ shift=0;
+ g=0;
+ } else {
+ /* Handle the fractional part. */
+ g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
+ }
+ /* Handle extreme gains with negative shift. */
+ if (shift<0)
+ {
+ /* For shift < -2 we'd be likely to overflow, so we're capping
the gain here. This shouldn't happen unless the bitstream is
already corrupted. */
- if (shift < -2)
- {
- g = 32767;
- shift = -2;
- }
- do {
- *f++ = SHL32(MULT16_16(*x++, g), -shift);
- } while (++j<band_end);
- } else
+ if (shift < -2)
+ {
+ g = 32767;
+ shift = -2;
+ }
+ do {
+ *f++ = SHL32(MULT16_16(*x++, g), -shift);
+ } while (++j<band_end);
+ } else
#endif
/* Be careful of the fixed-point "else" just above when changing this code */
do {
*f++ = SHR32(MULT16_16(*x++, g), shift);
} while (++j<band_end);
- }
- celt_assert(start <= end);
- for (i=M*eBands[end];i<N;i++)
- *f++ = 0;
- } while (++c<C);
+ }
+ celt_assert(start <= end);
+ OPUS_CLEAR(&freq[bound], N-bound);
}
/* This prevents energy collapse for transients with multiple short MDCTs */
void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
- int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
- opus_val16 *prev2logE, int *pulses, opus_uint32 seed)
+ int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
+ const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed)
{
int c, i, j, k;
for (i=start;i<end;i++)
@@ -274,7 +283,8 @@
N0 = m->eBands[i+1]-m->eBands[i];
/* depth in 1/8 bits */
- depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM);
+ celt_assert(pulses[i]>=0);
+ depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
#ifdef FIXED_POINT
thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
@@ -352,7 +362,7 @@
}
}
-static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N)
+static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
{
int i = bandID;
int j;
@@ -372,25 +382,25 @@
celt_norm r, l;
l = X[j];
r = Y[j];
- X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r);
+ X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14));
/* Side is not encoded, no need to calculate */
}
}
-static void stereo_split(celt_norm *X, celt_norm *Y, int N)
+static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
{
int j;
for (j=0;j<N;j++)
{
- celt_norm r, l;
- l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]);
- r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]);
- X[j] = l+r;
- Y[j] = r-l;
+ opus_val32 r, l;
+ l = MULT16_16(QCONST16(.70710678f, 15), X[j]);
+ r = MULT16_16(QCONST16(.70710678f, 15), Y[j]);
+ X[j] = EXTRACT16(SHR32(ADD32(l, r), 15));
+ Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15));
}
}
-static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
+static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N)
{
int j;
opus_val32 xp=0, side=0;
@@ -411,8 +421,7 @@
Er = MULT16_16(mid2, mid2) + side + 2*xp;
if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
{
- for (j=0;j<N;j++)
- Y[j] = X[j];
+ OPUS_COPY(Y, X, N);
return;
}
@@ -436,7 +445,7 @@
{
celt_norm r, l;
/* Apply mid scaling (side is already scaled) */
- l = MULT16_16_Q15(mid, X[j]);
+ l = MULT16_16_P15(mid, X[j]);
r = Y[j];
X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
@@ -445,7 +454,7 @@
#if 0
/* Decide whether we should spread the pulses in the current frame */
-int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
+int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
int last_decision, int *hf_average, int *tapset_decision, int update_hf,
int end, int C, int M)
{
@@ -466,7 +475,7 @@
{
int j, N, tmp=0;
int tcount[3] = {0,0,0};
- celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
+ const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
N = M*(eBands[i+1]-eBands[i]);
if (N<=8)
continue;
@@ -486,7 +495,7 @@
/* Only include four last bands (8 kHz and up) */
if (i>m->nbEBands-4)
- hf_sum += 32*(tcount[1]+tcount[0])/N;
+ hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
sum += tmp*256;
nbBands++;
@@ -496,7 +505,7 @@
if (update_hf)
{
if (hf_sum)
- hf_sum /= C*(4-m->nbEBands+end);
+ hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
*hf_average = (*hf_average+hf_sum)>>1;
hf_sum = *hf_average;
if (*tapset_decision==2)
@@ -512,7 +521,8 @@
}
/*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
celt_assert(nbBands>0); /* end has to be non-zero */
- sum /= nbBands;
+ celt_assert(sum>=0);
+ sum = celt_udiv(sum, nbBands);
/* Recursive averaging */
sum = (sum+*average)>>1;
*average = sum;
@@ -571,8 +581,7 @@
for (j=0;j<N0;j++)
tmp[i*N0+j] = X[j*stride+i];
}
- for (j=0;j<N;j++)
- X[j] = tmp[j];
+ OPUS_COPY(X, tmp, N);
RESTORE_STACK;
}
@@ -595,8 +604,7 @@
for (j=0;j<N0;j++)
tmp[j*stride+i] = X[i*N0+j];
}
- for (j=0;j<N;j++)
- X[j] = tmp[j];
+ OPUS_COPY(X, tmp, N);
RESTORE_STACK;
}
@@ -607,11 +615,11 @@
for (i=0;i<stride;i++)
for (j=0;j<N0;j++)
{
- celt_norm tmp1, tmp2;
- tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]);
- tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
- X[stride*2*j+i] = tmp1 + tmp2;
- X[stride*(2*j+1)+i] = tmp1 - tmp2;
+ opus_val32 tmp1, tmp2;
+ tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]);
+ tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
+ X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15));
+ X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15));
}
}
@@ -626,7 +634,8 @@
/* The upper limit ensures that in a stereo split with itheta==16384, we'll
always have enough bits left over to code at least one pulse in the
side; otherwise it would collapse, since it doesn't get folded. */
- qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2);
+ qb = celt_sudiv(b+N2*offset, N2);
+ qb = IMIN(b-pulse_cap-(4<<BITRES), qb);
qb = IMIN(8<<BITRES, qb);
@@ -773,7 +782,8 @@
ec_dec_update(ec, fl, fl+fs, ft);
}
}
- itheta = (opus_int32)itheta*16384/qn;
+ celt_assert(itheta>=0);
+ itheta = celt_udiv((opus_int32)itheta*16384, qn);
if (encode && stereo)
{
if (itheta==0)
@@ -1025,8 +1035,7 @@
fill &= cm_mask;
if (!fill)
{
- for (j=0;j<N;j++)
- X[j] = 0;
+ OPUS_CLEAR(X, N);
} else {
if (lowband == NULL)
{
@@ -1088,7 +1097,7 @@
longBlocks = B0==1;
- N_B /= B;
+ N_B = celt_udiv(N_B, B);
/* Special case for one sample */
if (N==1)
@@ -1102,9 +1111,7 @@
if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
{
- int j;
- for (j=0;j<N;j++)
- lowband_scratch[j] = lowband[j];
+ OPUS_COPY(lowband_scratch, lowband, N);
lowband = lowband_scratch;
}
@@ -1432,7 +1439,7 @@
ctx.remaining_bits = remaining_bits;
if (i <= codedBands-1)
{
- curr_balance = balance / IMIN(3, codedBands-i);
+ curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
} else {
b = 0;
diff --git a/lib/rbcodec/codecs/libopus/celt/bands.h b/lib/rbcodec/codecs/libopus/celt/bands.h
index 96ba52a..69901b1 100644
--- a/lib/rbcodec/codecs/libopus/celt/bands.h
+++ b/lib/rbcodec/codecs/libopus/celt/bands.h
@@ -41,7 +41,7 @@
* @param X Spectrum
* @param bandE Square root of the energy for each band (returned)
*/
-void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M);
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM);
/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/
@@ -59,14 +59,15 @@
* @param bandE Square root of the energy for each band
*/
void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
- celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, int end, int C, int M);
+ celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start,
+ int end, int M, int downsample, int silence);
#define SPREAD_NONE (0)
#define SPREAD_LIGHT (1)
#define SPREAD_NORMAL (2)
#define SPREAD_AGGRESSIVE (3)
-int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
+int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
int last_decision, int *hf_average, int *tapset_decision, int update_hf,
int end, int C, int M);
@@ -104,8 +105,8 @@
opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed);
void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
- int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
- opus_val16 *prev2logE, int *pulses, opus_uint32 seed);
+ int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
+ const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed);
opus_uint32 celt_lcg_rand(opus_uint32 seed);
diff --git a/lib/rbcodec/codecs/libopus/celt/celt.c b/lib/rbcodec/codecs/libopus/celt/celt.c
index 3e0ce6e..c0a1e0d 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt.c
@@ -54,6 +54,10 @@
#define PACKAGE_VERSION "unknown"
#endif
+#if defined(MIPSr1_ASM)
+#include "mips/celt_mipsr1.h"
+#endif
+
int resampling_factor(opus_int32 rate)
{
@@ -86,6 +90,63 @@
}
#ifndef OVERRIDE_COMB_FILTER_CONST
+/* This version should be faster on ARM */
+#ifdef OPUS_ARM_ASM
+static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
+ opus_val16 g10, opus_val16 g11, opus_val16 g12)
+{
+ opus_val32 x0, x1, x2, x3, x4;
+ int i;
+ x4 = SHL32(x[-T-2], 1);
+ x3 = SHL32(x[-T-1], 1);
+ x2 = SHL32(x[-T], 1);
+ x1 = SHL32(x[-T+1], 1);
+ for (i=0;i<N-4;i+=5)
+ {
+ opus_val32 t;
+ x0=SHL32(x[i-T+2],1);
+ t = MAC16_32_Q16(x[i], g10, x2);
+ t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
+ t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
+ y[i] = t;
+ x4=SHL32(x[i-T+3],1);
+ t = MAC16_32_Q16(x[i+1], g10, x1);
+ t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
+ t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
+ y[i+1] = t;
+ x3=SHL32(x[i-T+4],1);
+ t = MAC16_32_Q16(x[i+2], g10, x0);
+ t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
+ t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
+ y[i+2] = t;
+ x2=SHL32(x[i-T+5],1);
+ t = MAC16_32_Q16(x[i+3], g10, x4);
+ t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
+ t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
+ y[i+3] = t;
+ x1=SHL32(x[i-T+6],1);
+ t = MAC16_32_Q16(x[i+4], g10, x3);
+ t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
+ t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
+ y[i+4] = t;
+ }
+#ifdef CUSTOM_MODES
+ for (;i<N;i++)
+ {
+ opus_val32 t;
+ x0=SHL32(x[i-T+2],1);
+ t = MAC16_32_Q16(x[i], g10, x2);
+ t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
+ t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
+ y[i] = t;
+ x4=x3;
+ x3=x2;
+ x2=x1;
+ x1=x0;
+ }
+#endif
+}
+#else
static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12)
{
@@ -110,7 +171,9 @@
}
#endif
+#endif
+#ifndef OVERRIDE_comb_filter
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
const opus_val16 *window, int overlap)
@@ -131,16 +194,19 @@
OPUS_MOVE(y, x, N);
return;
}
- g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
- g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
- g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
- g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
- g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
- g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
+ g00 = MULT16_16_P15(g0, gains[tapset0][0]);
+ g01 = MULT16_16_P15(g0, gains[tapset0][1]);
+ g02 = MULT16_16_P15(g0, gains[tapset0][2]);
+ g10 = MULT16_16_P15(g1, gains[tapset1][0]);
+ g11 = MULT16_16_P15(g1, gains[tapset1][1]);
+ g12 = MULT16_16_P15(g1, gains[tapset1][2]);
x1 = x[-T1+1];
x2 = x[-T1 ];
x3 = x[-T1-1];
x4 = x[-T1-2];
+ /* If the filter didn't change, we don't need the overlap */
+ if (g0==g1 && T0==T1 && tapset0==tapset1)
+ overlap=0;
for (i=0;i<overlap;i++)
{
opus_val16 f;
@@ -170,6 +236,7 @@
/* Compute the part with the constant filter. */
comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);
}
+#endif /* OVERRIDE_comb_filter */
const signed char tf_select_table[4][8] = {
{0, -1, 0, -1, 0,-1, 0,-1},
diff --git a/lib/rbcodec/codecs/libopus/celt/celt.h b/lib/rbcodec/codecs/libopus/celt/celt.h
index 5deea1f..b196751 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt.h
+++ b/lib/rbcodec/codecs/libopus/celt/celt.h
@@ -134,7 +134,8 @@
int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);
-int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec);
+int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data,
+ int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);
#define celt_encoder_ctl opus_custom_encoder_ctl
#define celt_decoder_ctl opus_custom_decoder_ctl
@@ -205,10 +206,10 @@
void init_caps(const CELTMode *m,int *cap,int LM,int C);
#ifdef RESYNTH
-void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch);
-
-void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
- celt_sig * OPUS_RESTRICT out_mem[], int C, int LM);
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem);
+void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
+ opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
+ int LM, int downsample, int silence);
#endif
#ifdef __cplusplus
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_decoder.c b/lib/rbcodec/codecs/libopus/celt/celt_decoder.c
index 77fa2d0..8af96b7 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt_decoder.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt_decoder.c
@@ -51,6 +51,9 @@
#include "celt_lpc.h"
#include "vq.h"
+#if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT)
+#define NORM_ALIASING_HACK
+#endif
/**********************************************************************/
/* */
/* DECODER */
@@ -175,28 +178,24 @@
}
#endif /* CUSTOM_MODES */
-static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x)
-{
-#ifdef FIXED_POINT
- x = PSHR32(x, SIG_SHIFT);
- x = MAX32(x, -32768);
- x = MIN32(x, 32767);
- return EXTRACT16(x);
-#else
- return (opus_val16)x;
-#endif
-}
#ifndef RESYNTH
static
#endif
-void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch)
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef,
+ celt_sig *mem, int accum)
{
int c;
int Nd;
int apply_downsampling=0;
opus_val16 coef0;
-
+ VARDECL(celt_sig, scratch);
+ SAVE_STACK;
+#ifndef FIXED_POINT
+ (void)accum;
+ celt_assert(accum==0);
+#endif
+ ALLOC(scratch, N, celt_sig);
coef0 = coef[0];
Nd = N/downsample;
c=0; do {
@@ -234,11 +233,24 @@
apply_downsampling=1;
} else {
/* Shortcut for the standard (non-custom modes) case */
- for (j=0;j<N;j++)
+#ifdef FIXED_POINT
+ if (accum)
{
- celt_sig tmp = x[j] + m + VERY_SMALL;
- m = MULT16_32_Q15(coef0, tmp);
- y[j*C] = SCALEOUT(SIG2WORD16(tmp));
+ for (j=0;j<N;j++)
+ {
+ celt_sig tmp = x[j] + m + VERY_SMALL;
+ m = MULT16_32_Q15(coef0, tmp);
+ y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp))));
+ }
+ } else
+#endif
+ {
+ for (j=0;j<N;j++)
+ {
+ celt_sig tmp = x[j] + m + VERY_SMALL;
+ m = MULT16_32_Q15(coef0, tmp);
+ y[j*C] = SCALEOUT(SIG2WORD16(tmp));
+ }
}
}
mem[c] = m;
@@ -246,41 +258,94 @@
if (apply_downsampling)
{
/* Perform down-sampling */
- for (j=0;j<Nd;j++)
- y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
+#ifdef FIXED_POINT
+ if (accum)
+ {
+ for (j=0;j<Nd;j++)
+ y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample]))));
+ } else
+#endif
+ {
+ for (j=0;j<Nd;j++)
+ y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
+ }
}
} while (++c<C);
+ RESTORE_STACK;
}
-/** Compute the IMDCT and apply window for all sub-frames and
- all channels in a frame */
#ifndef RESYNTH
static
#endif
-void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
- celt_sig * OPUS_RESTRICT out_mem[], int C, int LM)
+void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
+ opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
+ int LM, int downsample, int silence)
{
- int b, c;
+ int c, i;
+ int M;
+ int b;
int B;
- int N;
+ int N, NB;
int shift;
- const int overlap = OVERLAP(mode);
+ int nbEBands;
+ int overlap;
+ VARDECL(celt_sig, freq);
+ SAVE_STACK;
- if (shortBlocks)
+ overlap = mode->overlap;
+ nbEBands = mode->nbEBands;
+ N = mode->shortMdctSize<<LM;
+ ALLOC(freq, N, celt_sig); /**< Interleaved signal MDCTs */
+ M = 1<<LM;
+
+ if (isTransient)
{
- B = shortBlocks;
- N = mode->shortMdctSize;
+ B = M;
+ NB = mode->shortMdctSize;
shift = mode->maxLM;
} else {
B = 1;
- N = mode->shortMdctSize<<LM;
+ NB = mode->shortMdctSize<<LM;
shift = mode->maxLM-LM;
}
- c=0; do {
- /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */
+
+ if (CC==2&&C==1)
+ {
+ /* Copying a mono streams to two channels */
+ celt_sig *freq2;
+ denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M,
+ downsample, silence);
+ /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */
+ freq2 = out_syn[1]+overlap/2;
+ OPUS_COPY(freq2, freq, N);
for (b=0;b<B;b++)
- clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B);
- } while (++c<C);
+ clt_mdct_backward(&mode->mdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B);
+ for (b=0;b<B;b++)
+ clt_mdct_backward(&mode->mdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B);
+ } else if (CC==1&&C==2)
+ {
+ /* Downmixing a stereo stream to mono */
+ celt_sig *freq2;
+ freq2 = out_syn[0]+overlap/2;
+ denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M,
+ downsample, silence);
+ /* Use the output buffer as temp array before downmixing. */
+ denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M,
+ downsample, silence);
+ for (i=0;i<N;i++)
+ freq[i] = HALF32(ADD32(freq[i],freq2[i]));
+ for (b=0;b<B;b++)
+ clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B);
+ } else {
+ /* Normal case (mono or stereo) */
+ c=0; do {
+ denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M,
+ downsample, silence);
+ for (b=0;b<B;b++)
+ clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B);
+ } while (++c<CC);
+ }
+ RESTORE_STACK;
}
static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
@@ -330,7 +395,23 @@
pitch of 480 Hz. */
#define PLC_PITCH_LAG_MIN (100)
-static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM)
+static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch)
+{
+ int pitch_index;
+ VARDECL( opus_val16, lp_pitch_buf );
+ SAVE_STACK;
+ ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
+ pitch_downsample(decode_mem, lp_pitch_buf,
+ DECODE_BUFFER_SIZE, C, arch);
+ pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
+ DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
+ PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch);
+ pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
+ RESTORE_STACK;
+ return pitch_index;
+}
+
+static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
{
int c;
int i;
@@ -343,11 +424,9 @@
int nbEBands;
int overlap;
int start;
- int downsample;
int loss_count;
int noise_based;
const opus_int16 *eBands;
- VARDECL(celt_sig, scratch);
SAVE_STACK;
mode = st->mode;
@@ -367,14 +446,15 @@
loss_count = st->loss_count;
start = st->start;
- downsample = st->downsample;
noise_based = loss_count >= 5 || start != 0;
- ALLOC(scratch, noise_based?N*C:N, celt_sig);
if (noise_based)
{
/* Noise-based PLC/CNG */
- celt_sig *freq;
+#ifdef NORM_ALIASING_HACK
+ celt_norm *X;
+#else
VARDECL(celt_norm, X);
+#endif
opus_uint32 seed;
opus_val16 *plcLogE;
int end;
@@ -383,10 +463,13 @@
end = st->end;
effEnd = IMAX(start, IMIN(end, mode->effEBands));
- /* Share the interleaved signal MDCT coefficient buffer with the
- deemphasis scratch buffer. */
- freq = scratch;
+#ifdef NORM_ALIASING_HACK
+ /* This is an ugly hack that breaks aliasing rules and would be easily broken,
+ but it saves almost 4kB of stack. */
+ X = (celt_norm*)(out_syn[C-1]+overlap/2);
+#else
ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
+#endif
if (loss_count >= 5)
plcLogE = backgroundLogE;
@@ -421,20 +504,12 @@
}
st->rng = seed;
- denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM);
-
- c=0; do {
- int bound = eBands[effEnd]<<LM;
- if (downsample!=1)
- bound = IMIN(bound, N/downsample);
- for (i=bound;i<N;i++)
- freq[c*N+i] = 0;
- } while (++c<C);
c=0; do {
OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
DECODE_BUFFER_SIZE-N+(overlap>>1));
} while (++c<C);
- compute_inv_mdcts(mode, 0, freq, out_syn, C, LM);
+
+ celt_synthesis(mode, X, out_syn, plcLogE, start, effEnd, C, C, 0, LM, st->downsample, 0);
} else {
/* Pitch-based PLC */
const opus_val16 *window;
@@ -445,15 +520,7 @@
if (loss_count == 0)
{
- VARDECL( opus_val16, lp_pitch_buf );
- ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
- pitch_downsample(decode_mem, lp_pitch_buf,
- DECODE_BUFFER_SIZE, C, st->arch);
- pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
- DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
- PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch);
- pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
- st->last_pitch_index = pitch_index;
+ st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch);
} else {
pitch_index = st->last_pitch_index;
fade = QCONST16(.8f,15);
@@ -644,25 +711,23 @@
} while (++c<C);
}
- deemphasis(out_syn, pcm, N, C, downsample,
- mode->preemph, st->preemph_memD, scratch);
-
st->loss_count = loss_count+1;
RESTORE_STACK;
}
-#define FREQ_X_BUF_SIZE (2*8*120) /* stereo * nbShortMdcts * shortMdctSize */
-static celt_sig s_freq[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 7680 byte */
-static celt_norm s_X[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 3840 byte */
-int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec)
+int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
+ int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
{
int c, i, N;
int spread_decision;
opus_int32 bits;
ec_dec _dec;
- VARDECL(celt_sig, freq);
+#ifdef NORM_ALIASING_HACK
+ celt_norm *X;
+#else
VARDECL(celt_norm, X);
+#endif
VARDECL(int, fine_quant);
VARDECL(int, pulses);
VARDECL(int, cap);
@@ -680,6 +745,8 @@
int intra_ener;
const int CC = st->channels;
int LM, M;
+ int start;
+ int end;
int effEnd;
int codedBands;
int alloc_trim;
@@ -706,11 +773,10 @@
nbEBands = mode->nbEBands;
overlap = mode->overlap;
eBands = mode->eBands;
+ start = st->start;
+ end = st->end;
frame_size *= st->downsample;
- c=0; do {
- decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
- } while (++c<CC);
lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
oldBandE = lpc+CC*LPC_ORDER;
oldLogE = oldBandE + 2*nbEBands;
@@ -728,7 +794,7 @@
if (data0<0)
return OPUS_INVALID_PACKET;
}
- st->end = IMAX(1, mode->effEBands-2*(data0>>5));
+ st->end = end = IMAX(1, mode->effEBands-2*(data0>>5));
LM = (data0>>3)&0x3;
C = 1 + ((data0>>2)&0x1);
data++;
@@ -755,14 +821,19 @@
return OPUS_BAD_ARG;
N = M*mode->shortMdctSize;
+ c=0; do {
+ decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+ out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
+ } while (++c<CC);
- effEnd = st->end;
+ effEnd = end;
if (effEnd > mode->effEBands)
effEnd = mode->effEBands;
if (data == NULL || len<=1)
{
- celt_decode_lost(st, pcm, N, LM);
+ celt_decode_lost(st, N, LM);
+ deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
RESTORE_STACK;
return frame_size/st->downsample;
}
@@ -798,7 +869,7 @@
postfilter_gain = 0;
postfilter_pitch = 0;
postfilter_tapset = 0;
- if (st->start==0 && tell+16 <= total_bits)
+ if (start==0 && tell+16 <= total_bits)
{
if(ec_dec_bit_logp(dec, 1))
{
@@ -829,11 +900,11 @@
/* Decode the global flags (first symbols in the stream) */
intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
/* Get band energies */
- unquant_coarse_energy(mode, st->start, st->end, oldBandE,
+ unquant_coarse_energy(mode, start, end, oldBandE,
intra_ener, dec, C, LM);
ALLOC(tf_res, nbEBands, int);
- tf_decode(st->start, st->end, isTransient, tf_res, LM, dec);
+ tf_decode(start, end, isTransient, tf_res, LM, dec);
tell = ec_tell(dec);
spread_decision = SPREAD_NORMAL;
@@ -849,7 +920,7 @@
dynalloc_logp = 6;
total_bits<<=BITRES;
tell = ec_tell_frac(dec);
- for (i=st->start;i<st->end;i++)
+ for (i=start;i<end;i++)
{
int width, quanta;
int dynalloc_loop_logp;
@@ -888,21 +959,28 @@
ALLOC(pulses, nbEBands, int);
ALLOC(fine_priority, nbEBands, int);
- codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
+ codedBands = compute_allocation(mode, start, end, offsets, cap,
alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
- unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C);
+ unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C);
+
+ c=0; do {
+ OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
+ } while (++c<CC);
/* Decode fixed codebook */
ALLOC(collapse_masks, C*nbEBands, unsigned char);
- /**< Interleaved normalised MDCTs */
- if (FREQ_X_BUF_SIZE >= C*N)
- X = s_X;
- else
- ALLOC(X, C*N, celt_norm);
- quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
+#ifdef NORM_ALIASING_HACK
+ /* This is an ugly hack that breaks aliasing rules and would be easily broken,
+ but it saves almost 4kB of stack. */
+ X = (celt_norm*)(out_syn[CC-1]+overlap/2);
+#else
+ ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
+#endif
+
+ quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng);
@@ -911,58 +989,20 @@
anti_collapse_on = ec_dec_bits(dec, 1);
}
- unquant_energy_finalise(mode, st->start, st->end, oldBandE,
+ unquant_energy_finalise(mode, start, end, oldBandE,
fine_quant, fine_priority, len*8-ec_tell(dec), dec, C);
if (anti_collapse_on)
anti_collapse(mode, X, collapse_masks, LM, C, N,
- st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
-
- /**< Interleaved signal MDCTs */
- if (FREQ_X_BUF_SIZE >= IMAX(CC,C)*N)
- freq = s_freq;
- else
- ALLOC(freq, IMAX(CC,C)*N, celt_sig);
+ start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
if (silence)
{
for (i=0;i<C*nbEBands;i++)
oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
- for (i=0;i<C*N;i++)
- freq[i] = 0;
- } else {
- /* Synthesis */
- denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
- }
- c=0; do {
- OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
- } while (++c<CC);
-
- c=0; do {
- int bound = M*eBands[effEnd];
- if (st->downsample!=1)
- bound = IMIN(bound, N/st->downsample);
- for (i=bound;i<N;i++)
- freq[c*N+i] = 0;
- } while (++c<C);
-
- c=0; do {
- out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
- } while (++c<CC);
-
- if (CC==2&&C==1)
- {
- for (i=0;i<N;i++)
- freq[N+i] = freq[i];
- }
- if (CC==1&&C==2)
- {
- for (i=0;i<N;i++)
- freq[i] = HALF32(ADD32(freq[i],freq[N+i]));
}
- /* Compute inverse MDCTs */
- compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM);
+ celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, CC, isTransient, LM, st->downsample, silence);
c=0; do {
st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
@@ -989,18 +1029,14 @@
st->postfilter_tapset_old = st->postfilter_tapset;
}
- if (C==1) {
- for (i=0;i<nbEBands;i++)
- oldBandE[nbEBands+i]=oldBandE[i];
- }
+ if (C==1)
+ OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands);
/* In case start or end were to change */
if (!isTransient)
{
- for (i=0;i<2*nbEBands;i++)
- oldLogE2[i] = oldLogE[i];
- for (i=0;i<2*nbEBands;i++)
- oldLogE[i] = oldBandE[i];
+ OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands);
+ OPUS_COPY(oldLogE, oldBandE, 2*nbEBands);
for (i=0;i<2*nbEBands;i++)
backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]);
} else {
@@ -1009,12 +1045,12 @@
}
c=0; do
{
- for (i=0;i<st->start;i++)
+ for (i=0;i<start;i++)
{
oldBandE[c*nbEBands+i]=0;
oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
}
- for (i=st->end;i<nbEBands;i++)
+ for (i=end;i<nbEBands;i++)
{
oldBandE[c*nbEBands+i]=0;
oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
@@ -1022,8 +1058,7 @@
} while (++c<2);
st->rng = dec->rng;
- /* We reuse freq[] as scratch space for the de-emphasis */
- deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq);
+ deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
st->loss_count = 0;
RESTORE_STACK;
if (ec_tell(dec) > 8*len)
@@ -1039,7 +1074,7 @@
#ifdef FIXED_POINT
int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
{
- return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
+ return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
}
#ifndef DISABLE_FLOAT_API
@@ -1056,7 +1091,7 @@
N = frame_size;
ALLOC(out, C*N, opus_int16);
- ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
+ ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
if (ret>0)
for (j=0;j<C*ret;j++)
pcm[j]=out[j]*(1.f/32768.f);
@@ -1070,7 +1105,7 @@
int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
{
- return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
+ return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
}
int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
@@ -1086,7 +1121,7 @@
N = frame_size;
ALLOC(out, C*N, celt_sig);
- ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
+ ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
if (ret>0)
for (j=0;j<C*ret;j++)
diff --git a/lib/rbcodec/codecs/libopus/celt/cwrs.c b/lib/rbcodec/codecs/libopus/celt/cwrs.c
index 03b8698..921100f 100644
--- a/lib/rbcodec/codecs/libopus/celt/cwrs.c
+++ b/lib/rbcodec/codecs/libopus/celt/cwrs.c
@@ -460,10 +460,12 @@
ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k));
}
-static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
+static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
opus_uint32 p;
int s;
int k0;
+ opus_int16 val;
+ opus_val32 yy=0;
celt_assert(_k>0);
celt_assert(_n>1);
while(_n>2){
@@ -487,7 +489,9 @@
}
else for(p=row[_k];p>_i;p=row[_k])_k--;
_i-=p;
- *_y++=(k0-_k+s)^s;
+ val=(k0-_k+s)^s;
+ *_y++=val;
+ yy=MAC16_16(yy,val,val);
}
/*Lots of dimensions case:*/
else{
@@ -507,7 +511,9 @@
do p=CELT_PVQ_U_ROW[--_k][_n];
while(p>_i);
_i-=p;
- *_y++=(k0-_k+s)^s;
+ val=(k0-_k+s)^s;
+ *_y++=val;
+ yy=MAC16_16(yy,val,val);
}
}
_n--;
@@ -519,14 +525,19 @@
k0=_k;
_k=(_i+1)>>1;
if(_k)_i-=2*_k-1;
- *_y++=(k0-_k+s)^s;
+ val=(k0-_k+s)^s;
+ *_y++=val;
+ yy=MAC16_16(yy,val,val);
/*_n==1*/
s=-(int)_i;
- *_y=(_k+s)^s;
+ val=(_k+s)^s;
+ *_y=val;
+ yy=MAC16_16(yy,val,val);
+ return yy;
}
-void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
- cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
+opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+ return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
}
#else /* SMALL_FOOTPRINT */
@@ -591,8 +602,10 @@
_y: Returns the vector of pulses.
_u: Must contain entries [0..._k+1] of row _n of U() on input.
Its contents will be destructively modified.*/
-static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
+static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
int j;
+ opus_int16 val;
+ opus_val32 yy=0;
celt_assert(_n>0);
j=0;
do{
@@ -607,10 +620,13 @@
while(p>_i)p=_u[--_k];
_i-=p;
yj-=_k;
- _y[j]=(yj+s)^s;
+ val=(yj+s)^s;
+ _y[j]=val;
+ yy=MAC16_16(yy,val,val);
uprev(_u,_k+2,0);
}
while(++j<_n);
+ return yy;
}
/*Returns the index of the given combination of K elements chosen from a set
@@ -685,13 +701,15 @@
RESTORE_STACK;
}
-void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
VARDECL(opus_uint32,u);
+ int ret;
SAVE_STACK;
celt_assert(_k>0);
ALLOC(u,_k+2U,opus_uint32);
- cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
+ ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
RESTORE_STACK;
+ return ret;
}
#endif /* SMALL_FOOTPRINT */
diff --git a/lib/rbcodec/codecs/libopus/celt/cwrs.h b/lib/rbcodec/codecs/libopus/celt/cwrs.h
index 7dfbd07..7cd4717 100644
--- a/lib/rbcodec/codecs/libopus/celt/cwrs.h
+++ b/lib/rbcodec/codecs/libopus/celt/cwrs.h
@@ -43,6 +43,6 @@
void encode_pulses(const int *_y, int N, int K, ec_enc *enc);
-void decode_pulses(int *_y, int N, int K, ec_dec *dec);
+opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec);
#endif /* CWRS_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/entcode.c b/lib/rbcodec/codecs/libopus/celt/entcode.c
index fa5d7c7..461a36d 100644
--- a/lib/rbcodec/codecs/libopus/celt/entcode.c
+++ b/lib/rbcodec/codecs/libopus/celt/entcode.c
@@ -62,6 +62,27 @@
}
#endif
+#if 1
+/* This is a faster version of ec_tell_frac() that takes advantage
+ of the low (1/8 bit) resolution to use just a linear function
+ followed by a lookup to determine the exact transition thresholds. */
+opus_uint32 ec_tell_frac(ec_ctx *_this){
+ static const unsigned correction[8] =
+ {35733, 38967, 42495, 46340,
+ 50535, 55109, 60097, 65535};
+ opus_uint32 nbits;
+ opus_uint32 r;
+ int l;
+ unsigned b;
+ nbits=_this->nbits_total<<BITRES;
+ l=EC_ILOG(_this->rng);
+ r=_this->rng>>(l-16);
+ b = (r>>12)-8;
+ b += r>correction[b];
+ l = (l<<3)+b;
+ return nbits-l;
+}
+#else
opus_uint32 ec_tell_frac(ec_ctx *_this){
opus_uint32 nbits;
opus_uint32 r;
@@ -91,3 +112,42 @@
}
return nbits-l;
}
+#endif
+
+#ifdef USE_SMALL_DIV_TABLE
+/* Result of 2^32/(2*i+1), except for i=0. */
+const opus_uint32 SMALL_DIV_TABLE[129] ICONST_ATTR = {
+ 0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924,
+ 0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111,
+ 0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C,
+ 0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084,
+ 0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906,
+ 0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A,
+ 0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A,
+ 0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104,
+ 0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1,
+ 0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2,
+ 0x0329161F, 0x03159721, 0x03030303, 0x02F14990,
+ 0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46,
+ 0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597,
+ 0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17,
+ 0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902,
+ 0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810,
+ 0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC,
+ 0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30,
+ 0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364,
+ 0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14,
+ 0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F,
+ 0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE,
+ 0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6,
+ 0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3,
+ 0x01539094, 0x01501501, 0x014CAB88, 0x0149539E,
+ 0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A,
+ 0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190,
+ 0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227,
+ 0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4,
+ 0x01194538, 0x0116E068, 0x011485F0, 0x0112358E,
+ 0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3,
+ 0x01073260, 0x0105197F, 0x0103091B, 0x01010101
+};
+#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/entcode.h b/lib/rbcodec/codecs/libopus/celt/entcode.h
index dd13e49..13d6c84 100644
--- a/lib/rbcodec/codecs/libopus/celt/entcode.h
+++ b/lib/rbcodec/codecs/libopus/celt/entcode.h
@@ -34,6 +34,12 @@
# include <stddef.h>
# include "ecintrin.h"
+extern const opus_uint32 SMALL_DIV_TABLE[129];
+
+#ifdef OPUS_ARM_ASM
+#define USE_SMALL_DIV_TABLE
+#endif
+
/*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a
larger type, you can speed up the decoder by using it here.*/
typedef opus_uint32 ec_window;
@@ -114,4 +120,33 @@
rounding error is in the positive direction).*/
opus_uint32 ec_tell_frac(ec_ctx *_this);
+/* Tested exhaustively for all n and for 1<=d<=256 */
+static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) {
+ celt_assert(d>0);
+#ifdef USE_SMALL_DIV_TABLE
+ if (d>256)
+ return n/d;
+ else {
+ opus_uint32 t, q;
+ t = EC_ILOG(d&-d);
+ q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32;
+ return q+(n-q*d >= d);
+ }
+#else
+ return n/d;
+#endif
+}
+
+static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) {
+ celt_assert(d>0);
+#ifdef USE_SMALL_DIV_TABLE
+ if (n<0)
+ return -(opus_int32)celt_udiv(-n, d);
+ else
+ return celt_udiv(n, d);
+#else
+ return n/d;
+#endif
+}
+
#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/entdec.c b/lib/rbcodec/codecs/libopus/celt/entdec.c
index 3c26468..0b3433e 100644
--- a/lib/rbcodec/codecs/libopus/celt/entdec.c
+++ b/lib/rbcodec/codecs/libopus/celt/entdec.c
@@ -138,7 +138,7 @@
unsigned ec_decode(ec_dec *_this,unsigned _ft){
unsigned s;
- _this->ext=_this->rng/_ft;
+ _this->ext=celt_udiv(_this->rng,_ft);
s=(unsigned)(_this->val/_this->ext);
return _ft-EC_MINI(s+1,_ft);
}
diff --git a/lib/rbcodec/codecs/libopus/celt/entenc.c b/lib/rbcodec/codecs/libopus/celt/entenc.c
index a7e34ec..271e4d3 100644
--- a/lib/rbcodec/codecs/libopus/celt/entenc.c
+++ b/lib/rbcodec/codecs/libopus/celt/entenc.c
@@ -127,7 +127,7 @@
void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){
opus_uint32 r;
- r=_this->rng/_ft;
+ r=celt_udiv(_this->rng,_ft);
if(_fl>0){
_this->val+=_this->rng-IMUL32(r,(_ft-_fl));
_this->rng=IMUL32(r,(_fh-_fl));
diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
index ecf018a..ac67d37 100644
--- a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
+++ b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
@@ -113,7 +113,11 @@
/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
b must fit in 31 bits.
Result fits in 32 bits. */
-#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
+#define MAC16_32_Q15(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
+
+/** 16x32 multiplication, followed by a 16-bit shift right and 32-bit add.
+ Results fits in 32 bits */
+#define MAC16_32_Q16(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16)))
#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))
@@ -131,4 +135,17 @@
/** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */
#define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b)))
+#if defined(MIPSr1_ASM)
+#include "mips/fixed_generic_mipsr1.h"
+#endif
+
+static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
+{
+ x = PSHR32(x, SIG_SHIFT);
+ x = MAX32(x, -32768);
+ x = MIN32(x, 32767);
+ return EXTRACT16(x);
+}
+#define SIG2WORD16(x) (SIG2WORD16_generic(x))
+
#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
index e2b8f3b..833ef5a 100644
--- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
+++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
@@ -45,73 +45,62 @@
complex numbers. It also delares the kf_ internal functions.
*/
-#if 0
static void kf_bfly2(
kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_state *st,
int m,
- int N,
- int mm
+ int N
)
{
kiss_fft_cpx * Fout2;
- const kiss_twiddle_cpx * tw1;
- int i,j;
- kiss_fft_cpx * Fout_beg = Fout;
- for (i=0;i<N;i++)
+ int i;
+ (void)m;
+#ifdef CUSTOM_MODES
+ if (m==1)
{
- Fout = Fout_beg + i*mm;
- Fout2 = Fout + m;
- tw1 = st->twiddles;
- for(j=0;j<m;j++)
+ celt_assert(m==1);
+ for (i=0;i<N;i++)
{
kiss_fft_cpx t;
- Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);
- Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);
- C_MUL (t, *Fout2 , *tw1);
- tw1 += fstride;
+ Fout2 = Fout + 1;
+ t = *Fout2;
C_SUB( *Fout2 , *Fout , t );
C_ADDTO( *Fout , t );
- ++Fout2;
- ++Fout;
+ Fout += 2;
}
- }
-}
+ } else
#endif
-
-static void ki_bfly2(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_state *st,
- int m,
- int N,
- int mm
- )
-{
- kiss_fft_cpx * Fout2;
- const kiss_twiddle_cpx * tw1;
- kiss_fft_cpx t;
- int i,j;
- kiss_fft_cpx * Fout_beg = Fout;
- for (i=0;i<N;i++)
{
- Fout = Fout_beg + i*mm;
- Fout2 = Fout + m;
- tw1 = st->twiddles;
- for(j=0;j<m;j++)
+ opus_val16 tw;
+ tw = QCONST16(0.7071067812f, 15);
+ /* We know that m==4 here because the radix-2 is just after a radix-4 */
+ celt_assert(m==4);
+ for (i=0;i<N;i++)
{
- C_MULC (t, *Fout2 , *tw1);
- tw1 += fstride;
- C_SUB( *Fout2 , *Fout , t );
- C_ADDTO( *Fout , t );
- ++Fout2;
- ++Fout;
+ kiss_fft_cpx t;
+ Fout2 = Fout + 4;
+ t = Fout2[0];
+ C_SUB( Fout2[0] , Fout[0] , t );
+ C_ADDTO( Fout[0] , t );
+
+ t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);
+ t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);
+ C_SUB( Fout2[1] , Fout[1] , t );
+ C_ADDTO( Fout[1] , t );
+
+ t.r = Fout2[2].i;
+ t.i = -Fout2[2].r;
+ C_SUB( Fout2[2] , Fout[2] , t );
+ C_ADDTO( Fout[2] , t );
+
+ t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);
+ t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);
+ C_SUB( Fout2[3] , Fout[3] , t );
+ C_ADDTO( Fout[3] , t );
+ Fout += 8;
}
}
}
-#if 0
static void kf_bfly4(
kiss_fft_cpx * Fout,
const size_t fstride,
@@ -121,93 +110,69 @@
int mm
)
{
- const kiss_twiddle_cpx *tw1,*tw2,*tw3;
- kiss_fft_cpx scratch[6];
- const size_t m2=2*m;
- const size_t m3=3*m;
- int i, j;
+ int i;
- kiss_fft_cpx * Fout_beg = Fout;
- for (i=0;i<N;i++)
+ if (m==1)
{
- Fout = Fout_beg + i*mm;
- tw3 = tw2 = tw1 = st->twiddles;
- for (j=0;j<m;j++)
+ /* Degenerate case where all the twiddles are 1. */
+ for (i=0;i<N;i++)
{
- C_MUL4(scratch[0],Fout[m] , *tw1 );
- C_MUL4(scratch[1],Fout[m2] , *tw2 );
- C_MUL4(scratch[2],Fout[m3] , *tw3 );
+ kiss_fft_cpx scratch0, scratch1;
- Fout->r = PSHR32(Fout->r, 2);
- Fout->i = PSHR32(Fout->i, 2);
- C_SUB( scratch[5] , *Fout, scratch[1] );
- C_ADDTO(*Fout, scratch[1]);
- C_ADD( scratch[3] , scratch[0] , scratch[2] );
- C_SUB( scratch[4] , scratch[0] , scratch[2] );
- C_SUB( Fout[m2], *Fout, scratch[3] );
- tw1 += fstride;
- tw2 += fstride*2;
- tw3 += fstride*3;
- C_ADDTO( *Fout , scratch[3] );
+ C_SUB( scratch0 , *Fout, Fout[2] );
+ C_ADDTO(*Fout, Fout[2]);
+ C_ADD( scratch1 , Fout[1] , Fout[3] );
+ C_SUB( Fout[2], *Fout, scratch1 );
+ C_ADDTO( *Fout , scratch1 );
+ C_SUB( scratch1 , Fout[1] , Fout[3] );
- Fout[m].r = scratch[5].r + scratch[4].i;
- Fout[m].i = scratch[5].i - scratch[4].r;
- Fout[m3].r = scratch[5].r - scratch[4].i;
- Fout[m3].i = scratch[5].i + scratch[4].r;
- ++Fout;
+ Fout[1].r = scratch0.r + scratch1.i;
+ Fout[1].i = scratch0.i - scratch1.r;
+ Fout[3].r = scratch0.r - scratch1.i;
+ Fout[3].i = scratch0.i + scratch1.r;
+ Fout+=4;
+ }
+ } else {
+ int j;
+ kiss_fft_cpx scratch[6];
+ const kiss_twiddle_cpx *tw1,*tw2,*tw3;
+ const int m2=2*m;
+ const int m3=3*m;
+ kiss_fft_cpx * Fout_beg = Fout;
+ for (i=0;i<N;i++)
+ {
+ Fout = Fout_beg + i*mm;
+ tw3 = tw2 = tw1 = st->twiddles;
+ /* m is guaranteed to be a multiple of 4. */
+ for (j=0;j<m;j++)
+ {
+ C_MUL(scratch[0],Fout[m] , *tw1 );
+ C_MUL(scratch[1],Fout[m2] , *tw2 );
+ C_MUL(scratch[2],Fout[m3] , *tw3 );
+
+ C_SUB( scratch[5] , *Fout, scratch[1] );
+ C_ADDTO(*Fout, scratch[1]);
+ C_ADD( scratch[3] , scratch[0] , scratch[2] );
+ C_SUB( scratch[4] , scratch[0] , scratch[2] );
+ C_SUB( Fout[m2], *Fout, scratch[3] );
+ tw1 += fstride;
+ tw2 += fstride*2;
+ tw3 += fstride*3;
+ C_ADDTO( *Fout , scratch[3] );
+
+ Fout[m].r = scratch[5].r + scratch[4].i;
+ Fout[m].i = scratch[5].i - scratch[4].r;
+ Fout[m3].r = scratch[5].r - scratch[4].i;
+ Fout[m3].i = scratch[5].i + scratch[4].r;
+ ++Fout;
+ }
}
}
}
-#endif
-static void ki_bfly4(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_state *st,
- int m,
- int N,
- int mm
- )
-{
- const kiss_twiddle_cpx *tw1,*tw2,*tw3;
- kiss_fft_cpx scratch[6];
- const size_t m2=2*m;
- const size_t m3=3*m;
- int i, j;
-
- kiss_fft_cpx * Fout_beg = Fout;
- for (i=0;i<N;i++)
- {
- Fout = Fout_beg + i*mm;
- tw3 = tw2 = tw1 = st->twiddles;
- for (j=0;j<m;j++)
- {
- C_MULC(scratch[0],Fout[m] , *tw1 );
- C_MULC(scratch[1],Fout[m2] , *tw2 );
- C_MULC(scratch[2],Fout[m3] , *tw3 );
-
- C_SUB( scratch[5] , *Fout, scratch[1] );
- C_ADDTO(*Fout, scratch[1]);
- C_ADD( scratch[3] , scratch[0] , scratch[2] );
- C_SUB( scratch[4] , scratch[0] , scratch[2] );
- C_SUB( Fout[m2], *Fout, scratch[3] );
- tw1 += fstride;
- tw2 += fstride*2;
- tw3 += fstride*3;
- C_ADDTO( *Fout , scratch[3] );
-
- Fout[m].r = scratch[5].r - scratch[4].i;
- Fout[m].i = scratch[5].i + scratch[4].r;
- Fout[m3].r = scratch[5].r + scratch[4].i;
- Fout[m3].i = scratch[5].i - scratch[4].r;
- ++Fout;
- }
- }
-}
#ifndef RADIX_TWO_ONLY
-#if 0
static void kf_bfly3(
kiss_fft_cpx * Fout,
const size_t fstride,
@@ -225,14 +190,19 @@
kiss_twiddle_cpx epi3;
kiss_fft_cpx * Fout_beg = Fout;
+#ifdef FIXED_POINT
+ epi3.r = -16384;
+ epi3.i = -28378;
+#else
epi3 = st->twiddles[fstride*m];
+#endif
for (i=0;i<N;i++)
{
Fout = Fout_beg + i*mm;
tw1=tw2=st->twiddles;
+ /* For non-custom modes, m is guaranteed to be a multiple of 4. */
k=m;
do {
- C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
C_MUL(scratch[1],Fout[m] , *tw1);
C_MUL(scratch[2],Fout[m2] , *tw2);
@@ -259,59 +229,9 @@
} while(--k);
}
}
-#endif
-static void ki_bfly3(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_state *st,
- int m,
- int N,
- int mm
- )
-{
- int i, k;
- const size_t m2 = 2*m;
- const kiss_twiddle_cpx *tw1,*tw2;
- kiss_fft_cpx scratch[5];
- kiss_twiddle_cpx epi3;
- kiss_fft_cpx * Fout_beg = Fout;
- epi3 = st->twiddles[fstride*m];
- for (i=0;i<N;i++)
- {
- Fout = Fout_beg + i*mm;
- tw1=tw2=st->twiddles;
- k=m;
- do{
-
- C_MULC(scratch[1],Fout[m] , *tw1);
- C_MULC(scratch[2],Fout[m2] , *tw2);
-
- C_ADD(scratch[3],scratch[1],scratch[2]);
- C_SUB(scratch[0],scratch[1],scratch[2]);
- tw1 += fstride;
- tw2 += fstride*2;
-
- Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
- Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
-
- C_MULBYSCALAR( scratch[0] , -epi3.i );
-
- C_ADDTO(*Fout,scratch[3]);
-
- Fout[m2].r = Fout[m].r + scratch[0].i;
- Fout[m2].i = Fout[m].i - scratch[0].r;
-
- Fout[m].r -= scratch[0].i;
- Fout[m].i += scratch[0].r;
-
- ++Fout;
- }while(--k);
- }
-}
-
-#if 0
+#ifndef OVERRIDE_kf_bfly5
static void kf_bfly5(
kiss_fft_cpx * Fout,
const size_t fstride,
@@ -324,13 +244,19 @@
kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
int i, u;
kiss_fft_cpx scratch[13];
- const kiss_twiddle_cpx * twiddles = st->twiddles;
const kiss_twiddle_cpx *tw;
kiss_twiddle_cpx ya,yb;
kiss_fft_cpx * Fout_beg = Fout;
- ya = twiddles[fstride*m];
- yb = twiddles[fstride*2*m];
+#ifdef FIXED_POINT
+ ya.r = 10126;
+ ya.i = -31164;
+ yb.r = -26510;
+ yb.i = -19261;
+#else
+ ya = st->twiddles[fstride*m];
+ yb = st->twiddles[fstride*2*m];
+#endif
tw=st->twiddles;
for (i=0;i<N;i++)
@@ -342,8 +268,8 @@
Fout3=Fout0+3*m;
Fout4=Fout0+4*m;
+ /* For non-custom modes, m is guaranteed to be a multiple of 4. */
for ( u=0; u<m; ++u ) {
- C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
scratch[0] = *Fout0;
C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
@@ -380,75 +306,8 @@
}
}
}
-#endif
+#endif /* OVERRIDE_kf_bfly5 */
-static void ki_bfly5(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_state *st,
- int m,
- int N,
- int mm
- )
-{
- kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
- int i, u;
- kiss_fft_cpx scratch[13];
- const kiss_twiddle_cpx * twiddles = st->twiddles;
- const kiss_twiddle_cpx *tw;
- kiss_twiddle_cpx ya,yb;
- kiss_fft_cpx * Fout_beg = Fout;
-
- ya = twiddles[fstride*m];
- yb = twiddles[fstride*2*m];
- tw=st->twiddles;
-
- for (i=0;i<N;i++)
- {
- Fout = Fout_beg + i*mm;
- Fout0=Fout;
- Fout1=Fout0+m;
- Fout2=Fout0+2*m;
- Fout3=Fout0+3*m;
- Fout4=Fout0+4*m;
-
- for ( u=0; u<m; ++u ) {
- scratch[0] = *Fout0;
-
- C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
- C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
- C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
- C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
-
- C_ADD( scratch[7],scratch[1],scratch[4]);
- C_SUB( scratch[10],scratch[1],scratch[4]);
- C_ADD( scratch[8],scratch[2],scratch[3]);
- C_SUB( scratch[9],scratch[2],scratch[3]);
-
- Fout0->r += scratch[7].r + scratch[8].r;
- Fout0->i += scratch[7].i + scratch[8].i;
-
- scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
- scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
-
- scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
- scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
-
- C_SUB(*Fout1,scratch[5],scratch[6]);
- C_ADD(*Fout4,scratch[5],scratch[6]);
-
- scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
- scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
- scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
- scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
-
- C_ADD(*Fout2,scratch[11],scratch[12]);
- C_SUB(*Fout3,scratch[11],scratch[12]);
-
- ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
- }
- }
-}
#endif
@@ -496,6 +355,9 @@
int kf_factor(int n,opus_int16 * facbuf)
{
int p=4;
+ int i;
+ int stages=0;
+ int nbak = n;
/*factor out powers of 4, powers of 2, then any remaining primes */
do {
@@ -517,9 +379,30 @@
{
return 0;
}
- *facbuf++ = p;
- *facbuf++ = n;
+ facbuf[2*stages] = p;
+ if (p==2 && stages > 1)
+ {
+ facbuf[2*stages] = 4;
+ facbuf[2] = 2;
+ }
+ stages++;
} while (n > 1);
+ n = nbak;
+ /* Reverse the order to get the radix 4 at the end, so we can use the
+ fast degenerate case. It turns out that reversing the order also
+ improves the noise behaviour. */
+ for (i=0;i<stages/2;i++)
+ {
+ int tmp;
+ tmp = facbuf[2*i];
+ facbuf[2*i] = facbuf[2*(stages-i-1)];
+ facbuf[2*(stages-i-1)] = tmp;
+ }
+ for (i=0;i<stages;i++)
+ {
+ n /= facbuf[2*i];
+ facbuf[2*i+1] = n;
+ }
return 1;
}
@@ -563,14 +446,20 @@
kiss_twiddle_cpx *twiddles;
st->nfft=nfft;
-#ifndef FIXED_POINT
+#ifdef FIXED_POINT
+ st->scale_shift = celt_ilog2(st->nfft);
+ if (st->nfft == 1<<st->scale_shift)
+ st->scale = Q15ONE;
+ else
+ st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
+#else
st->scale = 1.f/nfft;
#endif
if (base != NULL)
{
st->twiddles = base->twiddles;
st->shift = 0;
- while (nfft<<st->shift != base->nfft && st->shift < 32)
+ while (st->shift < 32 && nfft<<st->shift != base->nfft)
st->shift++;
if (st->shift>=32)
goto fail;
@@ -614,8 +503,7 @@
#endif /* CUSTOM_MODES */
-#if 0
-void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
{
int m2, m;
int p;
@@ -627,17 +515,6 @@
/* st->shift can be -1 */
shift = st->shift>0 ? st->shift : 0;
- celt_assert2 (fin != fout, "In-place FFT not supported");
- /* Bit-reverse the input */
- for (i=0;i<st->nfft;i++)
- {
- fout[st->bitrev[i]] = fin[i];
-#ifndef FIXED_POINT
- fout[st->bitrev[i]].r *= st->scale;
- fout[st->bitrev[i]].i *= st->scale;
-#endif
- }
-
fstride[0] = 1;
L=0;
do {
@@ -656,7 +533,7 @@
switch (st->factors[2*i])
{
case 2:
- kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+ kf_bfly2(fout, m, fstride[i]);
break;
case 4:
kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
@@ -673,57 +550,44 @@
m = m2;
}
}
+
+#if 0
+void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+ int i;
+ opus_val16 scale;
+#ifdef FIXED_POINT
+ /* Allows us to scale with MULT16_32_Q16(), which is faster than
+ MULT16_32_Q15() on ARM. */
+ int scale_shift = st->scale_shift-1;
+#endif
+ scale = st->scale;
+
+ celt_assert2 (fin != fout, "In-place FFT not supported");
+ /* Bit-reverse the input */
+ for (i=0;i<st->nfft;i++)
+ {
+ kiss_fft_cpx x = fin[i];
+ fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
+ fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
+ }
+ opus_fft_impl(st, fout);
+}
#endif
+
+#ifdef TEST_UNIT_DFT_C
void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
{
- int m2, m;
- int p;
- int L;
- int fstride[MAXFACTORS];
int i;
- int shift;
-
- /* st->shift can be -1 */
- shift = st->shift>0 ? st->shift : 0;
celt_assert2 (fin != fout, "In-place FFT not supported");
/* Bit-reverse the input */
for (i=0;i<st->nfft;i++)
fout[st->bitrev[i]] = fin[i];
-
- fstride[0] = 1;
- L=0;
- do {
- p = st->factors[2*L];
- m = st->factors[2*L+1];
- fstride[L+1] = fstride[L]*p;
- L++;
- } while(m!=1);
- m = st->factors[2*L-1];
- for (i=L-1;i>=0;i--)
- {
- if (i!=0)
- m2 = st->factors[2*i-1];
- else
- m2 = 1;
- switch (st->factors[2*i])
- {
- case 2:
- ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
- break;
- case 4:
- ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
- break;
-#ifndef RADIX_TWO_ONLY
- case 3:
- ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
- break;
- case 5:
- ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
- break;
-#endif
- }
- m = m2;
- }
+ for (i=0;i<st->nfft;i++)
+ fout[i].i = -fout[i].i;
+ opus_fft_impl(st, fout);
+ for (i=0;i<st->nfft;i++)
+ fout[i].i = -fout[i].i;
}
-
+#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.h b/lib/rbcodec/codecs/libopus/celt/kiss_fft.h
index 66cf1f2..390b54d 100644
--- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.h
+++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.h
@@ -79,8 +79,9 @@
typedef struct kiss_fft_state{
int nfft;
-#ifndef FIXED_POINT
- kiss_fft_scalar scale;
+ opus_val16 scale;
+#ifdef FIXED_POINT
+ int scale_shift;
#endif
int shift;
opus_int16 factors[2*MAXFACTORS];
@@ -128,14 +129,10 @@
f[k].r and f[k].i
* */
void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
-#if defined(CPU_COLDFIRE)
-#define IFFT_ICODE ICODE_ATTR
-#else
-#define IFFT_ICODE
-#endif
-
-void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) IFFT_ICODE;
+void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
+void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
void opus_fft_free(const kiss_fft_state *cfg);
diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.c b/lib/rbcodec/codecs/libopus/celt/mdct.c
index 72ea180..7fa8eaf 100644
--- a/lib/rbcodec/codecs/libopus/celt/mdct.c
+++ b/lib/rbcodec/codecs/libopus/celt/mdct.c
@@ -53,18 +53,20 @@
#include "mathops.h"
#include "stack_alloc.h"
+#if defined(MIPSr1_ASM)
+#include "mips/mdct_mipsr1.h"
+#endif
+
+
#ifdef CUSTOM_MODES
int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
{
int i;
- int N4;
kiss_twiddle_scalar *trig;
-#if defined(FIXED_POINT)
+ int shift;
int N2=N>>1;
-#endif
l->n = N;
- N4 = N>>2;
l->maxshift = maxshift;
for (i=0;i<=maxshift;i++)
{
@@ -77,17 +79,28 @@
return 0;
#endif
}
- l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar));
+ l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar));
if (l->trig==NULL)
return 0;
- /* We have enough points that sine isn't necessary */
+ for (shift=0;shift<=maxshift;shift++)
+ {
+ /* We have enough points that sine isn't necessary */
#if defined(FIXED_POINT)
- for (i=0;i<=N4;i++)
- trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N));
+#if 1
+ for (i=0;i<N2;i++)
+ trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2+16384),N));
#else
- for (i=0;i<=N4;i++)
- trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N);
+ for (i=0;i<N2;i++)
+ trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768*cos(2*M_PI*(i+.125)/N))));
#endif
+#else
+ for (i=0;i<N2;i++)
+ trig[i] = (kiss_twiddle_scalar)cos(2*PI*(i+.125)/N);
+#endif
+ trig += N2;
+ N2 >>= 1;
+ N >>= 1;
+ }
return 1;
}
@@ -103,27 +116,37 @@
#if 0
/* Forward MDCT trashes the input array */
+#ifndef OVERRIDE_clt_mdct_forward
void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap, int shift, int stride)
{
int i;
int N, N2, N4;
- kiss_twiddle_scalar sine;
VARDECL(kiss_fft_scalar, f);
- VARDECL(kiss_fft_scalar, f2);
+ VARDECL(kiss_fft_cpx, f2);
+ const kiss_fft_state *st = l->kfft[shift];
+ const kiss_twiddle_scalar *trig;
+ opus_val16 scale;
+#ifdef FIXED_POINT
+ /* Allows us to scale with MULT16_32_Q16(), which is faster than
+ MULT16_32_Q15() on ARM. */
+ int scale_shift = st->scale_shift-1;
+#endif
SAVE_STACK;
+ scale = st->scale;
+
N = l->n;
- N >>= shift;
+ trig = l->trig;
+ for (i=0;i<shift;i++)
+ {
+ N >>= 1;
+ trig += N;
+ }
N2 = N>>1;
N4 = N>>2;
+
ALLOC(f, N2, kiss_fft_scalar);
- ALLOC(f2, N2, kiss_fft_scalar);
- /* sin(x) ~= x here */
-#ifdef FIXED_POINT
- sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
-#else
- sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
-#endif
+ ALLOC(f2, N4, kiss_fft_cpx);
/* Consider the input to be composed of four blocks: [a, b, c, d] */
/* Window, shuffle, fold */
@@ -168,125 +191,131 @@
/* Pre-rotation */
{
kiss_fft_scalar * OPUS_RESTRICT yp = f;
- const kiss_twiddle_scalar *t = &l->trig[0];
+ const kiss_twiddle_scalar *t = &trig[0];
for(i=0;i<N4;i++)
{
+ kiss_fft_cpx yc;
+ kiss_twiddle_scalar t0, t1;
kiss_fft_scalar re, im, yr, yi;
- re = yp[0];
- im = yp[1];
- yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
- yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
- /* works because the cos is nearly one */
- *yp++ = yr + S_MUL(yi,sine);
- *yp++ = yi - S_MUL(yr,sine);
+ t0 = t[i];
+ t1 = t[N4+i];
+ re = *yp++;
+ im = *yp++;
+ yr = S_MUL(re,t0) - S_MUL(im,t1);
+ yi = S_MUL(im,t0) + S_MUL(re,t1);
+ yc.r = yr;
+ yc.i = yi;
+ yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
+ yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
+ f2[st->bitrev[i]] = yc;
}
}
- /* N/4 complex FFT, down-scales by 4/N */
- opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2);
+ /* N/4 complex FFT, does not downscale anymore */
+ opus_fft_impl(st, f2);
/* Post-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
- const kiss_fft_scalar * OPUS_RESTRICT fp = f2;
+ const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
- const kiss_twiddle_scalar *t = &l->trig[0];
+ const kiss_twiddle_scalar *t = &trig[0];
/* Temp pointers to make it really clear to the compiler what we're doing */
for(i=0;i<N4;i++)
{
kiss_fft_scalar yr, yi;
- yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
- yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
- /* works because the cos is nearly one */
- *yp1 = yr - S_MUL(yi,sine);
- *yp2 = yi + S_MUL(yr,sine);;
- fp += 2;
+ yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
+ yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
+ *yp1 = yr;
+ *yp2 = yi;
+ fp++;
yp1 += 2*stride;
yp2 -= 2*stride;
}
}
RESTORE_STACK;
}
+#endif /* OVERRIDE_clt_mdct_forward */
#endif
+#ifndef OVERRIDE_clt_mdct_backward
void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
{
int i;
int N, N2, N4;
- kiss_twiddle_scalar sine;
-/* VARDECL(kiss_fft_scalar, f2);
- SAVE_STACK; */
+ const kiss_twiddle_scalar *trig;
+
N = l->n;
- N >>= shift;
+ trig = l->trig;
+ for (i=0;i<shift;i++)
+ {
+ N >>= 1;
+ trig += N;
+ }
N2 = N>>1;
N4 = N>>2;
-/* ALLOC(f2, N2, kiss_fft_scalar); */
- kiss_fft_scalar f2[N2]; /* worst case 3840b */
- /* sin(x) ~= x here */
-#ifdef FIXED_POINT
- sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
-#else
- sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
-#endif
/* Pre-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
- kiss_fft_scalar * OPUS_RESTRICT yp = f2;
- const kiss_twiddle_scalar *t = &l->trig[0];
+ kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
+ const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
+ const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
for(i=0;i<N4;i++)
{
+ int rev;
kiss_fft_scalar yr, yi;
- yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
- yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
- /* works because the cos is nearly one */
- *yp++ = yr - S_MUL(yi,sine);
- *yp++ = yi + S_MUL(yr,sine);
+ rev = *bitrev++;
+ yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
+ yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
+ /* We swap real and imag because we use an FFT instead of an IFFT. */
+ yp[2*rev+1] = yr;
+ yp[2*rev] = yi;
+ /* Storing the pre-rotation directly in the bitrev order. */
xp1+=2*stride;
xp2-=2*stride;
}
}
- /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
- opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1)));
+ opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
/* Post-rotate and de-shuffle from both ends of the buffer at once to make
it in-place. */
{
- kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
- kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
- const kiss_twiddle_scalar *t = &l->trig[0];
+ kiss_fft_scalar * yp0 = out+(overlap>>1);
+ kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
+ const kiss_twiddle_scalar *t = &trig[0];
/* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
middle pair will be computed twice. */
for(i=0;i<(N4+1)>>1;i++)
{
kiss_fft_scalar re, im, yr, yi;
kiss_twiddle_scalar t0, t1;
- re = yp0[0];
- im = yp0[1];
- t0 = t[i<<shift];
- t1 = t[(N4-i)<<shift];
+ /* We swap real and imag because we're using an FFT instead of an IFFT. */
+ re = yp0[1];
+ im = yp0[0];
+ t0 = t[i];
+ t1 = t[N4+i];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
- yr = S_MUL(re,t0) - S_MUL(im,t1);
- yi = S_MUL(im,t0) + S_MUL(re,t1);
- re = yp1[0];
- im = yp1[1];
- /* works because the cos is nearly one */
- yp0[0] = -(yr - S_MUL(yi,sine));
- yp1[1] = yi + S_MUL(yr,sine);
+ yr = S_MUL(re,t0) + S_MUL(im,t1);
+ yi = S_MUL(re,t1) - S_MUL(im,t0);
+ /* We swap real and imag because we're using an FFT instead of an IFFT. */
+ re = yp1[1];
+ im = yp1[0];
+ yp0[0] = yr;
+ yp1[1] = yi;
- t0 = t[(N4-i-1)<<shift];
- t1 = t[(i+1)<<shift];
+ t0 = t[(N4-i-1)];
+ t1 = t[(N2-i-1)];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
- yr = S_MUL(re,t0) - S_MUL(im,t1);
- yi = S_MUL(im,t0) + S_MUL(re,t1);
- /* works because the cos is nearly one */
- yp1[0] = -(yr - S_MUL(yi,sine));
- yp0[1] = yi + S_MUL(yr,sine);
+ yr = S_MUL(re,t0) + S_MUL(im,t1);
+ yi = S_MUL(re,t1) - S_MUL(im,t0);
+ yp1[0] = yr;
+ yp0[1] = yi;
yp0 += 2;
yp1 -= 2;
}
@@ -310,5 +339,5 @@
wp2--;
}
}
-/* RESTORE_STACK; */
}
+#endif /* OVERRIDE_clt_mdct_backward */
diff --git a/lib/rbcodec/codecs/libopus/celt/modes.h b/lib/rbcodec/codecs/libopus/celt/modes.h
index c8340f9..be813cc 100644
--- a/lib/rbcodec/codecs/libopus/celt/modes.h
+++ b/lib/rbcodec/codecs/libopus/celt/modes.h
@@ -39,14 +39,6 @@
#define MAX_PERIOD 1024
-#ifndef OVERLAP
-#define OVERLAP(mode) ((mode)->overlap)
-#endif
-
-#ifndef FRAMESIZE
-#define FRAMESIZE(mode) ((mode)->mdctSize)
-#endif
-
typedef struct {
int size;
const opus_int16 *index;
diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.c b/lib/rbcodec/codecs/libopus/celt/pitch.c
index c288572..ee56a43 100644
--- a/lib/rbcodec/codecs/libopus/celt/pitch.c
+++ b/lib/rbcodec/codecs/libopus/celt/pitch.c
@@ -252,15 +252,15 @@
#endif
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
{
- int i,j;
+ int i;
/*The EDSP version requires that max_pitch is at least 1, and that _x is
32-bit aligned.
Since it's hard to put asserts in assembly, put them here.*/
- celt_assert(max_pitch>0);
- celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
#ifdef FIXED_POINT
opus_val32 maxcorr=1;
#endif
+ celt_assert(max_pitch>0);
+ celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
for (i=0;i<max_pitch-3;i+=4)
{
opus_val32 sum[4]={0,0,0,0};
@@ -279,9 +279,8 @@
/* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
for (;i<max_pitch;i++)
{
- opus_val32 sum = 0;
- for (j=0;j<len;j++)
- sum = MAC16_16(sum, _x[j],_y[i+j]);
+ opus_val32 sum;
+ sum = celt_inner_prod(_x, _y+i, len);
xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
@@ -361,12 +360,17 @@
#endif
for (i=0;i<max_pitch>>1;i++)
{
- opus_val32 sum=0;
+ opus_val32 sum;
xcorr[i] = 0;
if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
continue;
+#ifdef FIXED_POINT
+ sum = 0;
for (j=0;j<len>>1;j++)
sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
+#else
+ sum = celt_inner_prod(x_lp, y+i, len>>1);
+#endif
xcorr[i] = MAX32(-1, sum);
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
@@ -457,7 +461,7 @@
opus_val16 g1;
opus_val16 cont=0;
opus_val16 thresh;
- T1 = (2*T0+k)/(2*k);
+ T1 = celt_udiv(2*T0+k, 2*k);
if (T1 < minperiod)
break;
/* Look for another strong correlation at T1b */
@@ -469,7 +473,7 @@
T1b = T0+T1;
} else
{
- T1b = (2*second_check[k]*T0+k)/(2*k);
+ T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
}
dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
xy += xy2;
@@ -514,13 +518,7 @@
pg = SHR32(frac_div32(best_xy,best_yy+1),16);
for (k=0;k<3;k++)
- {
- int T1 = T+k-1;
- xy = 0;
- for (i=0;i<N;i++)
- xy = MAC16_16(xy, x[i], x[i-T1]);
- xcorr[k] = xy;
- }
+ xcorr[k] = celt_inner_prod(x, x-(T+k-1), N);
if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
offset = 1;
else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.h b/lib/rbcodec/codecs/libopus/celt/pitch.h
index df317ec..96dbc0d 100644
--- a/lib/rbcodec/codecs/libopus/celt/pitch.h
+++ b/lib/rbcodec/codecs/libopus/celt/pitch.h
@@ -41,8 +41,12 @@
#include "x86/pitch_sse.h"
#endif
+#if defined(MIPSr1_ASM)
+#include "mips/pitch_mipsr1.h"
+#endif
+
#if defined(OPUS_ARM_ASM) && defined(FIXED_POINT)
-# include "arm/pitch_arm.h"
+//# include "arm/pitch_arm.h"
#endif
void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
@@ -141,6 +145,18 @@
}
#endif
+#ifndef OVERRIDE_CELT_INNER_PROD
+static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x, const opus_val16 *y,
+ int N)
+{
+ int i;
+ opus_val32 xy=0;
+ for (i=0;i<N;i++)
+ xy = MAC16_16(xy, x[i], y[i]);
+ return xy;
+}
+#endif
+
#ifdef FIXED_POINT
opus_val32
#else
diff --git a/lib/rbcodec/codecs/libopus/celt/rate.c b/lib/rbcodec/codecs/libopus/celt/rate.c
index e13d839..f85c3ee 100644
--- a/lib/rbcodec/codecs/libopus/celt/rate.c
+++ b/lib/rbcodec/codecs/libopus/celt/rate.c
@@ -333,7 +333,7 @@
/*Figure out how many left-over bits we would be adding to this band.
This can include bits we've stolen back from higher, skipped bands.*/
left = total-psum;
- percoeff = left/(m->eBands[codedBands]-m->eBands[start]);
+ percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]);
left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0);
band_width = m->eBands[codedBands]-m->eBands[j];
@@ -414,7 +414,7 @@
/* Allocate the remaining bits */
left = total-psum;
- percoeff = left/(m->eBands[codedBands]-m->eBands[start]);
+ percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]);
left -= (m->eBands[codedBands]-m->eBands[start])*percoeff;
for (j=start;j<codedBands;j++)
bits[j] += ((int)percoeff*(m->eBands[j+1]-m->eBands[j]));
@@ -465,7 +465,8 @@
offset += NClogN>>3;
/* Divide with rounding */
- ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<<BITRES));
+ ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))));
+ ebits[j] = celt_udiv(ebits[j], den)>>BITRES;
/* Make sure not to bust */
if (C*ebits[j] > (bits[j]>>BITRES))
diff --git a/lib/rbcodec/codecs/libopus/celt/stack_alloc.h b/lib/rbcodec/codecs/libopus/celt/stack_alloc.h
index 316a6ce..2b51c8d 100644
--- a/lib/rbcodec/codecs/libopus/celt/stack_alloc.h
+++ b/lib/rbcodec/codecs/libopus/celt/stack_alloc.h
@@ -116,9 +116,11 @@
#else
#ifdef CELT_C
+char *scratch_ptr=0;
char *global_stack=0;
#else
extern char *global_stack;
+extern char *scratch_ptr;
#endif /* CELT_C */
#ifdef ENABLE_VALGRIND
@@ -140,8 +142,12 @@
#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char))))
+#if 0 /* Set this to 1 to instrument pseudostack usage */
+#define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack)
+#else
#define RESTORE_STACK (global_stack = _saved_stack)
-#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack;
+#endif
+#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? (scratch_ptr=opus_alloc_scratch(GLOBAL_STACK_SIZE)) : global_stack); _saved_stack = global_stack;
#endif /* ENABLE_VALGRIND */
diff --git a/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h b/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h
index 92e5fe5..0396ce3 100644
--- a/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h
+++ b/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h
@@ -341,84 +341,84 @@
#ifndef FFT_BITREV480
#define FFT_BITREV480
static const opus_int16 fft_bitrev480[480] = {
-0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330,
-450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225,
-345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95,
-215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440,
-110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310,
-430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205,
-325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61,
-181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406,
-76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276,
-396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171,
-291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41,
-161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386,
-56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242,
-362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137,
-257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7,
-127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457,
-22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352,
-472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222,
-342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117,
-237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423,
-93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318,
-438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188,
-308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83,
-203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403,
-73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298,
-418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154,
-274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49,
-169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369,
-39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264,
-384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134,
-254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29,
-149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479,
+0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448,
+8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456,
+16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464,
+24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472,
+4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452,
+12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460,
+20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468,
+28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476,
+1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449,
+9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457,
+17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465,
+25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473,
+5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453,
+13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461,
+21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469,
+29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477,
+2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450,
+10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458,
+18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466,
+26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474,
+6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454,
+14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462,
+22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470,
+30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478,
+3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451,
+11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459,
+19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467,
+27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475,
+7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455,
+15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463,
+23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471,
+31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479,
};
#endif
#ifndef FFT_BITREV240
#define FFT_BITREV240
static const opus_int16 fft_bitrev240[240] = {
-0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165,
-225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110,
-170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55,
-115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211,
-46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156,
-216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101,
-161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32,
-92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202,
-37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147,
-207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78,
-138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23,
-83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193,
-28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124,
-184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69,
-129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14,
-74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239,
+0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224,
+4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228,
+8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232,
+12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236,
+1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225,
+5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229,
+9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233,
+13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237,
+2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226,
+6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230,
+10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234,
+14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238,
+3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227,
+7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231,
+11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235,
+15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239,
};
#endif
#ifndef FFT_BITREV120
#define FFT_BITREV120
static const opus_int16 fft_bitrev120[120] = {
-0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80,
-110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46,
-76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26,
-56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97,
-22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63,
-93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43,
-73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9,
-39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119,
+0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112,
+4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116,
+1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113,
+5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117,
+2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114,
+6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118,
+3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115,
+7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119,
};
#endif
#ifndef FFT_BITREV60
#define FFT_BITREV60
static const opus_int16 fft_bitrev60[60] = {
-0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31,
-46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22,
-37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13,
-28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59,
+0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56,
+1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57,
+2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58,
+3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59,
};
#endif
@@ -426,8 +426,10 @@
#define FFT_STATE48000_960_0
static const kiss_fft_state fft_state48000_960_0 ICONST_ATTR = {
480, /* nfft */
+17476, /* scale */
+8, /* scale_shift */
-1, /* shift */
-{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
+{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev480, /* bitrev */
fft_twiddles48000_960, /* bitrev */
};
@@ -437,8 +439,10 @@
#define FFT_STATE48000_960_1
static const kiss_fft_state fft_state48000_960_1 ICONST_ATTR = {
240, /* nfft */
+17476, /* scale */
+7, /* scale_shift */
1, /* shift */
-{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
+{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev240, /* bitrev */
fft_twiddles48000_960, /* bitrev */
};
@@ -448,8 +452,10 @@
#define FFT_STATE48000_960_2
static const kiss_fft_state fft_state48000_960_2 ICONST_ATTR = {
120, /* nfft */
+17476, /* scale */
+6, /* scale_shift */
2, /* shift */
-{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
+{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev120, /* bitrev */
fft_twiddles48000_960, /* bitrev */
};
@@ -459,8 +465,10 @@
#define FFT_STATE48000_960_3
static const kiss_fft_state fft_state48000_960_3 ICONST_ATTR = {
60, /* nfft */
+17476, /* scale */
+5, /* scale_shift */
3, /* shift */
-{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
+{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev60, /* bitrev */
fft_twiddles48000_960, /* bitrev */
};
@@ -470,104 +478,368 @@
#ifndef MDCT_TWIDDLES960
#define MDCT_TWIDDLES960
-static const opus_val16 mdct_twiddles960[481] ICONST_ATTR = {
-32767, 32767, 32767, 32767, 32766,
-32763, 32762, 32759, 32757, 32753,
-32751, 32747, 32743, 32738, 32733,
-32729, 32724, 32717, 32711, 32705,
-32698, 32690, 32683, 32676, 32667,
-32658, 32650, 32640, 32631, 32620,
-32610, 32599, 32588, 32577, 32566,
-32554, 32541, 32528, 32515, 32502,
-32487, 32474, 32459, 32444, 32429,
-32413, 32397, 32381, 32364, 32348,
-32331, 32313, 32294, 32277, 32257,
-32239, 32219, 32200, 32180, 32159,
-32138, 32118, 32096, 32074, 32051,
-32029, 32006, 31984, 31960, 31936,
-31912, 31888, 31863, 31837, 31812,
-31786, 31760, 31734, 31707, 31679,
-31652, 31624, 31596, 31567, 31539,
-31508, 31479, 31450, 31419, 31388,
-31357, 31326, 31294, 31262, 31230,
-31198, 31164, 31131, 31097, 31063,
-31030, 30994, 30959, 30924, 30889,
-30853, 30816, 30779, 30743, 30705,
-30668, 30629, 30592, 30553, 30515,
-30475, 30435, 30396, 30356, 30315,
-30274, 30233, 30191, 30149, 30107,
-30065, 30022, 29979, 29936, 29891,
-29847, 29803, 29758, 29713, 29668,
-29622, 29577, 29529, 29483, 29436,
-29390, 29341, 29293, 29246, 29197,
-29148, 29098, 29050, 29000, 28949,
-28899, 28848, 28797, 28746, 28694,
-28642, 28590, 28537, 28485, 28432,
-28378, 28324, 28271, 28217, 28162,
-28106, 28051, 27995, 27940, 27884,
-27827, 27770, 27713, 27657, 27598,
-27540, 27481, 27423, 27365, 27305,
-27246, 27187, 27126, 27066, 27006,
-26945, 26883, 26822, 26760, 26698,
-26636, 26574, 26510, 26448, 26383,
-26320, 26257, 26191, 26127, 26062,
-25997, 25931, 25866, 25800, 25734,
-25667, 25601, 25533, 25466, 25398,
-25330, 25262, 25194, 25125, 25056,
-24987, 24917, 24848, 24778, 24707,
-24636, 24566, 24495, 24424, 24352,
-24280, 24208, 24135, 24063, 23990,
-23917, 23842, 23769, 23695, 23622,
-23546, 23472, 23398, 23322, 23246,
-23171, 23095, 23018, 22942, 22866,
-22788, 22711, 22634, 22557, 22478,
-22400, 22322, 22244, 22165, 22085,
-22006, 21927, 21846, 21766, 21687,
-21606, 21524, 21443, 21363, 21282,
-21199, 21118, 21035, 20954, 20870,
-20788, 20705, 20621, 20538, 20455,
-20371, 20286, 20202, 20118, 20034,
-19947, 19863, 19777, 19692, 19606,
-19520, 19434, 19347, 19260, 19174,
-19088, 18999, 18911, 18825, 18737,
-18648, 18560, 18472, 18384, 18294,
-18205, 18116, 18025, 17936, 17846,
-17757, 17666, 17576, 17485, 17395,
-17303, 17212, 17122, 17030, 16937,
-16846, 16755, 16662, 16569, 16477,
-16385, 16291, 16198, 16105, 16012,
-15917, 15824, 15730, 15636, 15541,
-15447, 15352, 15257, 15162, 15067,
-14973, 14875, 14781, 14685, 14589,
-14493, 14396, 14300, 14204, 14107,
-14010, 13914, 13815, 13718, 13621,
-13524, 13425, 13328, 13230, 13133,
-13033, 12935, 12836, 12738, 12638,
-12540, 12441, 12341, 12241, 12142,
-12044, 11943, 11843, 11744, 11643,
-11542, 11442, 11342, 11241, 11139,
-11039, 10939, 10836, 10736, 10635,
-10534, 10431, 10330, 10228, 10127,
-10024, 9921, 9820, 9718, 9614,
-9512, 9410, 9306, 9204, 9101,
-8998, 8895, 8791, 8689, 8585,
-8481, 8377, 8274, 8171, 8067,
-7962, 7858, 7753, 7650, 7545,
-7441, 7336, 7231, 7129, 7023,
-6917, 6813, 6709, 6604, 6498,
-6393, 6288, 6182, 6077, 5973,
-5867, 5760, 5656, 5549, 5445,
-5339, 5232, 5127, 5022, 4914,
-4809, 4703, 4596, 4490, 4384,
-4278, 4171, 4065, 3958, 3852,
-3745, 3640, 3532, 3426, 3318,
-3212, 3106, 2998, 2891, 2786,
-2679, 2570, 2465, 2358, 2251,
-2143, 2037, 1929, 1823, 1715,
-1609, 1501, 1393, 1287, 1180,
-1073, 964, 858, 751, 644,
-535, 429, 322, 214, 107,
-0, };
+static const opus_val16 mdct_twiddles960[1800] ICONST_ATTR = {
+32767, 32767, 32767, 32766, 32765,
+32763, 32761, 32759, 32756, 32753,
+32750, 32746, 32742, 32738, 32733,
+32728, 32722, 32717, 32710, 32704,
+32697, 32690, 32682, 32674, 32666,
+32657, 32648, 32639, 32629, 32619,
+32609, 32598, 32587, 32576, 32564,
+32552, 32539, 32526, 32513, 32500,
+32486, 32472, 32457, 32442, 32427,
+32411, 32395, 32379, 32362, 32345,
+32328, 32310, 32292, 32274, 32255,
+32236, 32217, 32197, 32177, 32157,
+32136, 32115, 32093, 32071, 32049,
+32027, 32004, 31981, 31957, 31933,
+31909, 31884, 31859, 31834, 31809,
+31783, 31756, 31730, 31703, 31676,
+31648, 31620, 31592, 31563, 31534,
+31505, 31475, 31445, 31415, 31384,
+31353, 31322, 31290, 31258, 31226,
+31193, 31160, 31127, 31093, 31059,
+31025, 30990, 30955, 30920, 30884,
+30848, 30812, 30775, 30738, 30701,
+30663, 30625, 30587, 30548, 30509,
+30470, 30430, 30390, 30350, 30309,
+30269, 30227, 30186, 30144, 30102,
+30059, 30016, 29973, 29930, 29886,
+29842, 29797, 29752, 29707, 29662,
+29616, 29570, 29524, 29477, 29430,
+29383, 29335, 29287, 29239, 29190,
+29142, 29092, 29043, 28993, 28943,
+28892, 28842, 28791, 28739, 28688,
+28636, 28583, 28531, 28478, 28425,
+28371, 28317, 28263, 28209, 28154,
+28099, 28044, 27988, 27932, 27876,
+27820, 27763, 27706, 27648, 27591,
+27533, 27474, 27416, 27357, 27298,
+27238, 27178, 27118, 27058, 26997,
+26936, 26875, 26814, 26752, 26690,
+26628, 26565, 26502, 26439, 26375,
+26312, 26247, 26183, 26119, 26054,
+25988, 25923, 25857, 25791, 25725,
+25658, 25592, 25524, 25457, 25389,
+25322, 25253, 25185, 25116, 25047,
+24978, 24908, 24838, 24768, 24698,
+24627, 24557, 24485, 24414, 24342,
+24270, 24198, 24126, 24053, 23980,
+23907, 23834, 23760, 23686, 23612,
+23537, 23462, 23387, 23312, 23237,
+23161, 23085, 23009, 22932, 22856,
+22779, 22701, 22624, 22546, 22468,
+22390, 22312, 22233, 22154, 22075,
+21996, 21916, 21836, 21756, 21676,
+21595, 21515, 21434, 21352, 21271,
+21189, 21107, 21025, 20943, 20860,
+20777, 20694, 20611, 20528, 20444,
+20360, 20276, 20192, 20107, 20022,
+19937, 19852, 19767, 19681, 19595,
+19509, 19423, 19336, 19250, 19163,
+19076, 18988, 18901, 18813, 18725,
+18637, 18549, 18460, 18372, 18283,
+18194, 18104, 18015, 17925, 17835,
+17745, 17655, 17565, 17474, 17383,
+17292, 17201, 17110, 17018, 16927,
+16835, 16743, 16650, 16558, 16465,
+16372, 16279, 16186, 16093, 15999,
+15906, 15812, 15718, 15624, 15529,
+15435, 15340, 15245, 15150, 15055,
+14960, 14864, 14769, 14673, 14577,
+14481, 14385, 14288, 14192, 14095,
+13998, 13901, 13804, 13706, 13609,
+13511, 13414, 13316, 13218, 13119,
+13021, 12923, 12824, 12725, 12626,
+12527, 12428, 12329, 12230, 12130,
+12030, 11930, 11831, 11730, 11630,
+11530, 11430, 11329, 11228, 11128,
+11027, 10926, 10824, 10723, 10622,
+10520, 10419, 10317, 10215, 10113,
+10011, 9909, 9807, 9704, 9602,
+9499, 9397, 9294, 9191, 9088,
+8985, 8882, 8778, 8675, 8572,
+8468, 8364, 8261, 8157, 8053,
+7949, 7845, 7741, 7637, 7532,
+7428, 7323, 7219, 7114, 7009,
+6905, 6800, 6695, 6590, 6485,
+6380, 6274, 6169, 6064, 5958,
+5853, 5747, 5642, 5536, 5430,
+5325, 5219, 5113, 5007, 4901,
+4795, 4689, 4583, 4476, 4370,
+4264, 4157, 4051, 3945, 3838,
+3732, 3625, 3518, 3412, 3305,
+3198, 3092, 2985, 2878, 2771,
+2664, 2558, 2451, 2344, 2237,
+2130, 2023, 1916, 1809, 1702,
+1594, 1487, 1380, 1273, 1166,
+1059, 952, 844, 737, 630,
+523, 416, 308, 201, 94,
+-13, -121, -228, -335, -442,
+-550, -657, -764, -871, -978,
+-1086, -1193, -1300, -1407, -1514,
+-1621, -1728, -1835, -1942, -2049,
+-2157, -2263, -2370, -2477, -2584,
+-2691, -2798, -2905, -3012, -3118,
+-3225, -3332, -3439, -3545, -3652,
+-3758, -3865, -3971, -4078, -4184,
+-4290, -4397, -4503, -4609, -4715,
+-4821, -4927, -5033, -5139, -5245,
+-5351, -5457, -5562, -5668, -5774,
+-5879, -5985, -6090, -6195, -6301,
+-6406, -6511, -6616, -6721, -6826,
+-6931, -7036, -7140, -7245, -7349,
+-7454, -7558, -7663, -7767, -7871,
+-7975, -8079, -8183, -8287, -8390,
+-8494, -8597, -8701, -8804, -8907,
+-9011, -9114, -9217, -9319, -9422,
+-9525, -9627, -9730, -9832, -9934,
+-10037, -10139, -10241, -10342, -10444,
+-10546, -10647, -10748, -10850, -10951,
+-11052, -11153, -11253, -11354, -11455,
+-11555, -11655, -11756, -11856, -11955,
+-12055, -12155, -12254, -12354, -12453,
+-12552, -12651, -12750, -12849, -12947,
+-13046, -13144, -13242, -13340, -13438,
+-13536, -13633, -13731, -13828, -13925,
+-14022, -14119, -14216, -14312, -14409,
+-14505, -14601, -14697, -14793, -14888,
+-14984, -15079, -15174, -15269, -15364,
+-15459, -15553, -15647, -15741, -15835,
+-15929, -16023, -16116, -16210, -16303,
+-16396, -16488, -16581, -16673, -16766,
+-16858, -16949, -17041, -17133, -17224,
+-17315, -17406, -17497, -17587, -17678,
+-17768, -17858, -17948, -18037, -18127,
+-18216, -18305, -18394, -18483, -18571,
+-18659, -18747, -18835, -18923, -19010,
+-19098, -19185, -19271, -19358, -19444,
+-19531, -19617, -19702, -19788, -19873,
+-19959, -20043, -20128, -20213, -20297,
+-20381, -20465, -20549, -20632, -20715,
+-20798, -20881, -20963, -21046, -21128,
+-21210, -21291, -21373, -21454, -21535,
+-21616, -21696, -21776, -21856, -21936,
+-22016, -22095, -22174, -22253, -22331,
+-22410, -22488, -22566, -22643, -22721,
+-22798, -22875, -22951, -23028, -23104,
+-23180, -23256, -23331, -23406, -23481,
+-23556, -23630, -23704, -23778, -23852,
+-23925, -23998, -24071, -24144, -24216,
+-24288, -24360, -24432, -24503, -24574,
+-24645, -24716, -24786, -24856, -24926,
+-24995, -25064, -25133, -25202, -25270,
+-25339, -25406, -25474, -25541, -25608,
+-25675, -25742, -25808, -25874, -25939,
+-26005, -26070, -26135, -26199, -26264,
+-26327, -26391, -26455, -26518, -26581,
+-26643, -26705, -26767, -26829, -26891,
+-26952, -27013, -27073, -27133, -27193,
+-27253, -27312, -27372, -27430, -27489,
+-27547, -27605, -27663, -27720, -27777,
+-27834, -27890, -27946, -28002, -28058,
+-28113, -28168, -28223, -28277, -28331,
+-28385, -28438, -28491, -28544, -28596,
+-28649, -28701, -28752, -28803, -28854,
+-28905, -28955, -29006, -29055, -29105,
+-29154, -29203, -29251, -29299, -29347,
+-29395, -29442, -29489, -29535, -29582,
+-29628, -29673, -29719, -29764, -29808,
+-29853, -29897, -29941, -29984, -30027,
+-30070, -30112, -30154, -30196, -30238,
+-30279, -30320, -30360, -30400, -30440,
+-30480, -30519, -30558, -30596, -30635,
+-30672, -30710, -30747, -30784, -30821,
+-30857, -30893, -30929, -30964, -30999,
+-31033, -31068, -31102, -31135, -31168,
+-31201, -31234, -31266, -31298, -31330,
+-31361, -31392, -31422, -31453, -31483,
+-31512, -31541, -31570, -31599, -31627,
+-31655, -31682, -31710, -31737, -31763,
+-31789, -31815, -31841, -31866, -31891,
+-31915, -31939, -31963, -31986, -32010,
+-32032, -32055, -32077, -32099, -32120,
+-32141, -32162, -32182, -32202, -32222,
+-32241, -32260, -32279, -32297, -32315,
+-32333, -32350, -32367, -32383, -32399,
+-32415, -32431, -32446, -32461, -32475,
+-32489, -32503, -32517, -32530, -32542,
+-32555, -32567, -32579, -32590, -32601,
+-32612, -32622, -32632, -32641, -32651,
+-32659, -32668, -32676, -32684, -32692,
+-32699, -32706, -32712, -32718, -32724,
+-32729, -32734, -32739, -32743, -32747,
+-32751, -32754, -32757, -32760, -32762,
+-32764, -32765, -32767, -32767, -32767,
+32767, 32767, 32765, 32761, 32756,
+32750, 32742, 32732, 32722, 32710,
+32696, 32681, 32665, 32647, 32628,
+32608, 32586, 32562, 32538, 32512,
+32484, 32455, 32425, 32393, 32360,
+32326, 32290, 32253, 32214, 32174,
+32133, 32090, 32046, 32001, 31954,
+31906, 31856, 31805, 31753, 31700,
+31645, 31588, 31530, 31471, 31411,
+31349, 31286, 31222, 31156, 31089,
+31020, 30951, 30880, 30807, 30733,
+30658, 30582, 30504, 30425, 30345,
+30263, 30181, 30096, 30011, 29924,
+29836, 29747, 29656, 29564, 29471,
+29377, 29281, 29184, 29086, 28987,
+28886, 28784, 28681, 28577, 28471,
+28365, 28257, 28147, 28037, 27925,
+27812, 27698, 27583, 27467, 27349,
+27231, 27111, 26990, 26868, 26744,
+26620, 26494, 26367, 26239, 26110,
+25980, 25849, 25717, 25583, 25449,
+25313, 25176, 25038, 24900, 24760,
+24619, 24477, 24333, 24189, 24044,
+23898, 23751, 23602, 23453, 23303,
+23152, 22999, 22846, 22692, 22537,
+22380, 22223, 22065, 21906, 21746,
+21585, 21423, 21261, 21097, 20933,
+20767, 20601, 20434, 20265, 20096,
+19927, 19756, 19584, 19412, 19239,
+19065, 18890, 18714, 18538, 18361,
+18183, 18004, 17824, 17644, 17463,
+17281, 17098, 16915, 16731, 16546,
+16361, 16175, 15988, 15800, 15612,
+15423, 15234, 15043, 14852, 14661,
+14469, 14276, 14083, 13889, 13694,
+13499, 13303, 13107, 12910, 12713,
+12515, 12317, 12118, 11918, 11718,
+11517, 11316, 11115, 10913, 10710,
+10508, 10304, 10100, 9896, 9691,
+9486, 9281, 9075, 8869, 8662,
+8455, 8248, 8040, 7832, 7623,
+7415, 7206, 6996, 6787, 6577,
+6366, 6156, 5945, 5734, 5523,
+5311, 5100, 4888, 4675, 4463,
+4251, 4038, 3825, 3612, 3399,
+3185, 2972, 2758, 2544, 2330,
+2116, 1902, 1688, 1474, 1260,
+1045, 831, 617, 402, 188,
+-27, -241, -456, -670, -885,
+-1099, -1313, -1528, -1742, -1956,
+-2170, -2384, -2598, -2811, -3025,
+-3239, -3452, -3665, -3878, -4091,
+-4304, -4516, -4728, -4941, -5153,
+-5364, -5576, -5787, -5998, -6209,
+-6419, -6629, -6839, -7049, -7258,
+-7467, -7676, -7884, -8092, -8300,
+-8507, -8714, -8920, -9127, -9332,
+-9538, -9743, -9947, -10151, -10355,
+-10558, -10761, -10963, -11165, -11367,
+-11568, -11768, -11968, -12167, -12366,
+-12565, -12762, -12960, -13156, -13352,
+-13548, -13743, -13937, -14131, -14324,
+-14517, -14709, -14900, -15091, -15281,
+-15470, -15659, -15847, -16035, -16221,
+-16407, -16593, -16777, -16961, -17144,
+-17326, -17508, -17689, -17869, -18049,
+-18227, -18405, -18582, -18758, -18934,
+-19108, -19282, -19455, -19627, -19799,
+-19969, -20139, -20308, -20475, -20642,
+-20809, -20974, -21138, -21301, -21464,
+-21626, -21786, -21946, -22105, -22263,
+-22420, -22575, -22730, -22884, -23037,
+-23189, -23340, -23490, -23640, -23788,
+-23935, -24080, -24225, -24369, -24512,
+-24654, -24795, -24934, -25073, -25211,
+-25347, -25482, -25617, -25750, -25882,
+-26013, -26143, -26272, -26399, -26526,
+-26651, -26775, -26898, -27020, -27141,
+-27260, -27379, -27496, -27612, -27727,
+-27841, -27953, -28065, -28175, -28284,
+-28391, -28498, -28603, -28707, -28810,
+-28911, -29012, -29111, -29209, -29305,
+-29401, -29495, -29587, -29679, -29769,
+-29858, -29946, -30032, -30118, -30201,
+-30284, -30365, -30445, -30524, -30601,
+-30677, -30752, -30825, -30897, -30968,
+-31038, -31106, -31172, -31238, -31302,
+-31365, -31426, -31486, -31545, -31602,
+-31658, -31713, -31766, -31818, -31869,
+-31918, -31966, -32012, -32058, -32101,
+-32144, -32185, -32224, -32262, -32299,
+-32335, -32369, -32401, -32433, -32463,
+-32491, -32518, -32544, -32568, -32591,
+-32613, -32633, -32652, -32669, -32685,
+-32700, -32713, -32724, -32735, -32744,
+-32751, -32757, -32762, -32766, -32767,
+32767, 32764, 32755, 32741, 32720,
+32694, 32663, 32626, 32583, 32535,
+32481, 32421, 32356, 32286, 32209,
+32128, 32041, 31948, 31850, 31747,
+31638, 31523, 31403, 31278, 31148,
+31012, 30871, 30724, 30572, 30415,
+30253, 30086, 29913, 29736, 29553,
+29365, 29172, 28974, 28771, 28564,
+28351, 28134, 27911, 27684, 27452,
+27216, 26975, 26729, 26478, 26223,
+25964, 25700, 25432, 25159, 24882,
+24601, 24315, 24026, 23732, 23434,
+23133, 22827, 22517, 22204, 21886,
+21565, 21240, 20912, 20580, 20244,
+19905, 19563, 19217, 18868, 18516,
+18160, 17802, 17440, 17075, 16708,
+16338, 15964, 15588, 15210, 14829,
+14445, 14059, 13670, 13279, 12886,
+12490, 12093, 11693, 11291, 10888,
+10482, 10075, 9666, 9255, 8843,
+8429, 8014, 7597, 7180, 6760,
+6340, 5919, 5496, 5073, 4649,
+4224, 3798, 3372, 2945, 2517,
+2090, 1661, 1233, 804, 375,
+-54, -483, -911, -1340, -1768,
+-2197, -2624, -3052, -3479, -3905,
+-4330, -4755, -5179, -5602, -6024,
+-6445, -6865, -7284, -7702, -8118,
+-8533, -8946, -9358, -9768, -10177,
+-10584, -10989, -11392, -11793, -12192,
+-12589, -12984, -13377, -13767, -14155,
+-14541, -14924, -15305, -15683, -16058,
+-16430, -16800, -17167, -17531, -17892,
+-18249, -18604, -18956, -19304, -19649,
+-19990, -20329, -20663, -20994, -21322,
+-21646, -21966, -22282, -22595, -22904,
+-23208, -23509, -23806, -24099, -24387,
+-24672, -24952, -25228, -25499, -25766,
+-26029, -26288, -26541, -26791, -27035,
+-27275, -27511, -27741, -27967, -28188,
+-28405, -28616, -28823, -29024, -29221,
+-29412, -29599, -29780, -29957, -30128,
+-30294, -30455, -30611, -30761, -30906,
+-31046, -31181, -31310, -31434, -31552,
+-31665, -31773, -31875, -31972, -32063,
+-32149, -32229, -32304, -32373, -32437,
+-32495, -32547, -32594, -32635, -32671,
+-32701, -32726, -32745, -32758, -32766,
+32767, 32754, 32717, 32658, 32577,
+32473, 32348, 32200, 32029, 31837,
+31624, 31388, 31131, 30853, 30553,
+30232, 29891, 29530, 29148, 28746,
+28324, 27883, 27423, 26944, 26447,
+25931, 25398, 24847, 24279, 23695,
+23095, 22478, 21846, 21199, 20538,
+19863, 19174, 18472, 17757, 17030,
+16291, 15541, 14781, 14010, 13230,
+12441, 11643, 10837, 10024, 9204,
+8377, 7545, 6708, 5866, 5020,
+4171, 3319, 2464, 1608, 751,
+-107, -965, -1822, -2678, -3532,
+-4383, -5232, -6077, -6918, -7754,
+-8585, -9409, -10228, -11039, -11843,
+-12639, -13426, -14204, -14972, -15730,
+-16477, -17213, -17937, -18648, -19347,
+-20033, -20705, -21363, -22006, -22634,
+-23246, -23843, -24423, -24986, -25533,
+-26062, -26573, -27066, -27540, -27995,
+-28431, -28848, -29245, -29622, -29979,
+-30315, -30630, -30924, -31197, -31449,
+-31679, -31887, -32074, -32239, -32381,
+-32501, -32600, -32675, -32729, -32759,
+};
#endif
static const CELTMode mode48000_960_120 ICONST_ATTR = {
diff --git a/lib/rbcodec/codecs/libopus/celt/vq.c b/lib/rbcodec/codecs/libopus/celt/vq.c
index af991bb..b047b22 100644
--- a/lib/rbcodec/codecs/libopus/celt/vq.c
+++ b/lib/rbcodec/codecs/libopus/celt/vq.c
@@ -37,19 +37,27 @@
#include "os_support.h"
#include "bands.h"
#include "rate.h"
+#include "pitch.h"
+#if defined(MIPSr1_ASM)
+#include "mips/vq_mipsr1.h"
+#endif
+
+#ifndef OVERRIDE_vq_exp_rotation1
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
{
int i;
+ opus_val16 ms;
celt_norm *Xptr;
Xptr = X;
+ ms = NEG16(s);
for (i=0;i<len-stride;i++)
{
celt_norm x1, x2;
x1 = Xptr[0];
x2 = Xptr[stride];
- Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
- *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
+ Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
+ *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
}
Xptr = &X[len-2*stride-1];
for (i=len-2*stride-1;i>=0;i--)
@@ -57,10 +65,11 @@
celt_norm x1, x2;
x1 = Xptr[0];
x2 = Xptr[stride];
- Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
- *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
+ Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
+ *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
}
}
+#endif /* OVERRIDE_vq_exp_rotation1 */
static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
{
@@ -91,7 +100,7 @@
}
/*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
extract_collapse_mask().*/
- len /= stride;
+ len = celt_udiv(len, stride);
for (i=0;i<stride;i++)
{
if (dir < 0)
@@ -140,13 +149,15 @@
return 1;
/*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
exp_rotation().*/
- N0 = N/B;
+ N0 = celt_udiv(N, B);
collapse_mask = 0;
i=0; do {
int j;
+ unsigned tmp=0;
j=0; do {
- collapse_mask |= (iy[i*N0+j]!=0)<<i;
+ tmp |= iy[i*N0+j];
} while (++j<N0);
+ collapse_mask |= (tmp!=0)<<i;
} while (++i<B);
return collapse_mask;
}
@@ -322,47 +333,34 @@
unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
ec_dec *dec, opus_val16 gain)
{
- int i;
opus_val32 Ryy;
unsigned collapse_mask;
-/* VARDECL(int, iy);
- SAVE_STACK; */
-
- /* the difference between the last two values of eband5ms shifted by maxLM
- which is 22 << 3 with the static mode */
- int iy[176];
+ VARDECL(int, iy);
+ SAVE_STACK;
celt_assert2(K>0, "alg_unquant() needs at least one pulse");
celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
-/* ALLOC(iy, N, int); */
- decode_pulses(iy, N, K, dec);
- Ryy = 0;
- i=0;
- do {
- Ryy = MAC16_16(Ryy, iy[i], iy[i]);
- } while (++i < N);
+ ALLOC(iy, N, int);
+ Ryy = decode_pulses(iy, N, K, dec);
normalise_residual(iy, X, N, Ryy, gain);
exp_rotation(X, N, -1, B, K, spread);
collapse_mask = extract_collapse_mask(iy, N, B);
-/* RESTORE_STACK; */
+ RESTORE_STACK;
return collapse_mask;
}
+#ifndef OVERRIDE_renormalise_vector
void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
{
int i;
#ifdef FIXED_POINT
int k;
#endif
- opus_val32 E = EPSILON;
+ opus_val32 E;
opus_val16 g;
opus_val32 t;
- celt_norm *xptr = X;
- for (i=0;i<N;i++)
- {
- E = MAC16_16(E, *xptr, *xptr);
- xptr++;
- }
+ celt_norm *xptr;
+ E = EPSILON + celt_inner_prod(X, X, N);
#ifdef FIXED_POINT
k = celt_ilog2(E)>>1;
#endif
@@ -377,8 +375,9 @@
}
/*return celt_sqrt(E);*/
}
+#endif /* OVERRIDE_renormalise_vector */
-int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
+int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N)
{
int i;
int itheta;
@@ -397,14 +396,8 @@
Eside = MAC16_16(Eside, s, s);
}
} else {
- for (i=0;i<N;i++)
- {
- celt_norm m, s;
- m = X[i];
- s = Y[i];
- Emid = MAC16_16(Emid, m, m);
- Eside = MAC16_16(Eside, s, s);
- }
+ Emid += celt_inner_prod(X, X, N);
+ Eside += celt_inner_prod(Y, Y, N);
}
mid = celt_sqrt(Emid);
side = celt_sqrt(Eside);
diff --git a/lib/rbcodec/codecs/libopus/celt/vq.h b/lib/rbcodec/codecs/libopus/celt/vq.h
index ffdc69c..84115cb 100644
--- a/lib/rbcodec/codecs/libopus/celt/vq.h
+++ b/lib/rbcodec/codecs/libopus/celt/vq.h
@@ -65,6 +65,6 @@
void renormalise_vector(celt_norm *X, int N, opus_val16 gain);
-int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N);
+int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N);
#endif /* VQ_H */
diff --git a/lib/rbcodec/codecs/libopus/config.h b/lib/rbcodec/codecs/libopus/config.h
index 1ce75ff..f9fdfb0 100644
--- a/lib/rbcodec/codecs/libopus/config.h
+++ b/lib/rbcodec/codecs/libopus/config.h
@@ -9,7 +9,8 @@
#define OPUS_BUILD
/* alloc stuff */
-#define NONTHREADSAFE_PSEUDOSTACK
+#define VAR_ARRAYS
+#define NORM_ALIASING_HACK
#define OVERRIDE_OPUS_ALLOC
#define OVERRIDE_OPUS_FREE
@@ -40,6 +41,7 @@
#endif
#if defined(CPU_ARM)
+#define OPUS_ARM_ASM
#if ARM_ARCH == 4
#define OPUS_ARM_INLINE_ASM
#elif ARM_ARCH > 4
diff --git a/lib/rbcodec/codecs/libopus/opus.c b/lib/rbcodec/codecs/libopus/opus.c
index 989c653..0526f8b 100644
--- a/lib/rbcodec/codecs/libopus/opus.c
+++ b/lib/rbcodec/codecs/libopus/opus.c
@@ -168,6 +168,27 @@
}
}
+int opus_packet_get_samples_per_frame(const unsigned char *data,
+ opus_int32 Fs)
+{
+ int audiosize;
+ if (data[0]&0x80)
+ {
+ audiosize = ((data[0]>>3)&0x3);
+ audiosize = (Fs<<audiosize)/400;
+ } else if ((data[0]&0x60) == 0x60)
+ {
+ audiosize = (data[0]&0x08) ? Fs/50 : Fs/100;
+ } else {
+ audiosize = ((data[0]>>3)&0x3);
+ if (audiosize == 3)
+ audiosize = Fs*60/1000;
+ else
+ audiosize = (Fs<<audiosize)/100;
+ }
+ return audiosize;
+}
+
int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
int self_delimited, unsigned char *out_toc,
const unsigned char *frames[48], opus_int16 size[48],
diff --git a/lib/rbcodec/codecs/libopus/opus_decoder.c b/lib/rbcodec/codecs/libopus/opus_decoder.c
index 198d168..5d35ed2 100644
--- a/lib/rbcodec/codecs/libopus/opus_decoder.c
+++ b/lib/rbcodec/codecs/libopus/opus_decoder.c
@@ -77,12 +77,6 @@
opus_uint32 rangeFinal;
};
-#ifdef FIXED_POINT
-static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
- return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
-}
-#endif
-
int opus_decoder_get_size(int channels)
{
@@ -222,7 +216,7 @@
VARDECL(opus_val16, pcm_transition_silk);
int pcm_transition_celt_size;
VARDECL(opus_val16, pcm_transition_celt);
- opus_val16 *pcm_transition = NULL; /* Silence false positive "may be used uninitialized" warning */
+ opus_val16 *pcm_transition=NULL;
int redundant_audio_size;
VARDECL(opus_val16, redundant_audio);
@@ -237,6 +231,7 @@
int F2_5, F5, F10, F20;
const opus_val16 *window;
opus_uint32 redundant_rng = 0;
+ int celt_accum;
ALLOC_STACK;
silk_dec = (char*)st+st->silk_dec_offset;
@@ -302,6 +297,14 @@
}
}
+ /* In fixed-point, we can tell CELT to do the accumulation on top of the
+ SILK PCM buffer. This saves some stack space. */
+#ifdef FIXED_POINT
+ celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10);
+#else
+ celt_accum = 0;
+#endif
+
pcm_transition_silk_size = ALLOC_NONE;
pcm_transition_celt_size = ALLOC_NONE;
if (data!=NULL && st->prev_mode > 0 && (
@@ -332,14 +335,20 @@
}
/* Don't allocate any memory when in CELT-only mode */
- pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE;
+ pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE;
ALLOC(pcm_silk, pcm_silk_size, opus_int16);
/* SILK processing */
if (mode != MODE_CELT_ONLY)
{
int lost_flag, decoded_samples;
- opus_int16 *pcm_ptr = pcm_silk;
+ opus_int16 *pcm_ptr;
+#ifdef FIXED_POINT
+ if (celt_accum)
+ pcm_ptr = pcm;
+ else
+#endif
+ pcm_ptr = pcm_silk;
if (st->prev_mode==MODE_CELT_ONLY)
silk_InitDecoder( silk_dec );
@@ -469,7 +478,7 @@
{
celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
celt_decode_with_ec(celt_dec, data+len, redundancy_bytes,
- redundant_audio, F5, NULL);
+ redundant_audio, F5, NULL, 0);
celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng));
}
@@ -484,25 +493,28 @@
celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
/* Decode CELT */
celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data,
- len, pcm, celt_frame_size, &dec);
+ len, pcm, celt_frame_size, &dec, celt_accum);
} else {
unsigned char silence[2] = {0xFF, 0xFF};
- for (i=0;i<frame_size*st->channels;i++)
- pcm[i] = 0;
+ if (!celt_accum)
+ {
+ for (i=0;i<frame_size*st->channels;i++)
+ pcm[i] = 0;
+ }
/* For hybrid -> SILK transitions, we let the CELT MDCT
do a fade-out by decoding a silence frame */
if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) )
{
celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
- celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL);
+ celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL, celt_accum);
}
}
- if (mode != MODE_CELT_ONLY)
+ if (mode != MODE_CELT_ONLY && !celt_accum)
{
#ifdef FIXED_POINT
for (i=0;i<frame_size*st->channels;i++)
- pcm[i] = SAT16(pcm[i] + pcm_silk[i]);
+ pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i]));
#else
for (i=0;i<frame_size*st->channels;i++)
pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]);
@@ -521,7 +533,7 @@
celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0));
- celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL);
+ celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0);
celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng));
smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5,
pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs);
@@ -717,6 +729,7 @@
{
VARDECL(opus_int16, out);
int ret, i;
+ int nb_samples;
ALLOC_STACK;
if(frame_size<=0)
@@ -724,6 +737,14 @@
RESTORE_STACK;
return OPUS_BAD_ARG;
}
+ if (data != NULL && len > 0 && !decode_fec)
+ {
+ nb_samples = opus_decoder_get_nb_samples(st, data, len);
+ if (nb_samples>0)
+ frame_size = IMIN(frame_size, nb_samples);
+ else
+ return OPUS_INVALID_PACKET;
+ }
ALLOC(out, frame_size*st->channels, opus_int16);
ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0);
@@ -744,6 +765,7 @@
{
VARDECL(float, out);
int ret, i;
+ int nb_samples;
ALLOC_STACK;
if(frame_size<=0)
@@ -752,6 +774,14 @@
return OPUS_BAD_ARG;
}
+ if (data != NULL && len > 0 && !decode_fec)
+ {
+ nb_samples = opus_decoder_get_nb_samples(st, data, len);
+ if (nb_samples>0)
+ frame_size = IMIN(frame_size, nb_samples);
+ else
+ return OPUS_INVALID_PACKET;
+ }
ALLOC(out, frame_size*st->channels, float);
ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1);
@@ -911,27 +941,6 @@
return bandwidth;
}
-int opus_packet_get_samples_per_frame(const unsigned char *data,
- opus_int32 Fs)
-{
- int audiosize;
- if (data[0]&0x80)
- {
- audiosize = ((data[0]>>3)&0x3);
- audiosize = (Fs<<audiosize)/400;
- } else if ((data[0]&0x60) == 0x60)
- {
- audiosize = (data[0]&0x08) ? Fs/50 : Fs/100;
- } else {
- audiosize = ((data[0]>>3)&0x3);
- if (audiosize == 3)
- audiosize = Fs*60/1000;
- else
- audiosize = (Fs<<audiosize)/100;
- }
- return audiosize;
-}
-
int opus_packet_get_nb_channels(const unsigned char *data)
{
return (data[0]&0x4) ? 2 : 1;
diff --git a/lib/rbcodec/codecs/libopus/opus_defines.h b/lib/rbcodec/codecs/libopus/opus_defines.h
index 265089f..84df7c7 100644
--- a/lib/rbcodec/codecs/libopus/opus_defines.h
+++ b/lib/rbcodec/codecs/libopus/opus_defines.h
@@ -454,14 +454,6 @@
* @hideinitializer */
#define OPUS_GET_APPLICATION(x) OPUS_GET_APPLICATION_REQUEST, __opus_check_int_ptr(x)
-/** Gets the sampling rate the encoder or decoder was initialized with.
- * This simply returns the <code>Fs</code> value passed to opus_encoder_init()
- * or opus_decoder_init().
- * @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder.
- * @hideinitializer
- */
-#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x)
-
/** Gets the total samples of delay added by the entire codec.
* This can be queried by the encoder and then the provided number of samples can be
* skipped on from the start of the decoder's output to provide time aligned input
@@ -545,11 +537,6 @@
* @hideinitializer */
#define OPUS_GET_LSB_DEPTH(x) OPUS_GET_LSB_DEPTH_REQUEST, __opus_check_int_ptr(x)
-/** Gets the duration (in samples) of the last packet successfully decoded or concealed.
- * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).
- * @hideinitializer */
-#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)
-
/** Configures the encoder's use of variable duration frames.
* When variable duration is enabled, the encoder is free to use a shorter frame
* size than the one requested in the opus_encode*() call.
@@ -649,18 +636,6 @@
* @hideinitializer */
#define OPUS_GET_FINAL_RANGE(x) OPUS_GET_FINAL_RANGE_REQUEST, __opus_check_uint_ptr(x)
-/** Gets the pitch of the last decoded frame, if available.
- * This can be used for any post-processing algorithm requiring the use of pitch,
- * e.g. time stretching/shortening. If the last frame was not voiced, or if the
- * pitch was not coded in the frame, then zero is returned.
- *
- * This CTL is only implemented for decoder instances.
- *
- * @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available)
- *
- * @hideinitializer */
-#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x)
-
/** Gets the encoder's configured bandpass or the decoder's last bandpass.
* @see OPUS_SET_BANDWIDTH
* @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
@@ -675,6 +650,14 @@
* @hideinitializer */
#define OPUS_GET_BANDWIDTH(x) OPUS_GET_BANDWIDTH_REQUEST, __opus_check_int_ptr(x)
+/** Gets the sampling rate the encoder or decoder was initialized with.
+ * This simply returns the <code>Fs</code> value passed to opus_encoder_init()
+ * or opus_decoder_init().
+ * @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder.
+ * @hideinitializer
+ */
+#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x)
+
/**@}*/
/** @defgroup opus_decoderctls Decoder related CTLs
@@ -699,6 +682,23 @@
* @hideinitializer */
#define OPUS_GET_GAIN(x) OPUS_GET_GAIN_REQUEST, __opus_check_int_ptr(x)
+/** Gets the duration (in samples) of the last packet successfully decoded or concealed.
+ * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).
+ * @hideinitializer */
+#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)
+
+/** Gets the pitch of the last decoded frame, if available.
+ * This can be used for any post-processing algorithm requiring the use of pitch,
+ * e.g. time stretching/shortening. If the last frame was not voiced, or if the
+ * pitch was not coded in the frame, then zero is returned.
+ *
+ * This CTL is only implemented for decoder instances.
+ *
+ * @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available)
+ *
+ * @hideinitializer */
+#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x)
+
/**@}*/
/** @defgroup opus_libinfo Opus library information functions
diff --git a/lib/rbcodec/codecs/libopus/opus_private.h b/lib/rbcodec/codecs/libopus/opus_private.h
index 83225f2..3177f52 100644
--- a/lib/rbcodec/codecs/libopus/opus_private.h
+++ b/lib/rbcodec/codecs/libopus/opus_private.h
@@ -86,10 +86,6 @@
void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
-int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
- int bitrate, opus_val16 tonality, float *mem, int buffering,
- downmix_func downmix);
-
int encode_size(int size, unsigned char *data);
opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs);
@@ -104,7 +100,8 @@
opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
- const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix);
+ const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2,
+ int analysis_channels, downmix_func downmix, int float_api);
int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,
opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,
diff --git a/lib/rbcodec/codecs/libopus/silk/CNG.c b/lib/rbcodec/codecs/libopus/silk/CNG.c
index 8481d95..bb30a7c 100644
--- a/lib/rbcodec/codecs/libopus/silk/CNG.c
+++ b/lib/rbcodec/codecs/libopus/silk/CNG.c
@@ -34,7 +34,7 @@
/* Generates excitation for CNG LPC synthesis */
static OPUS_INLINE void silk_CNG_exc(
- opus_int32 residual_Q10[], /* O CNG residual signal Q10 */
+ opus_int32 exc_Q10[], /* O CNG excitation signal Q10 */
opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */
opus_int32 Gain_Q16, /* I Gain to apply */
opus_int length, /* I Length */
@@ -55,7 +55,7 @@
idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask );
silk_assert( idx >= 0 );
silk_assert( idx <= CNG_BUF_MASK_MAX );
- residual_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
+ exc_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
}
*rand_seed = seed;
}
@@ -85,7 +85,7 @@
)
{
opus_int i, subfr;
- opus_int32 sum_Q6, max_Gain_Q16;
+ opus_int32 sum_Q6, max_Gain_Q16, gain_Q16;
opus_int16 A_Q12[ MAX_LPC_ORDER ];
silk_CNG_struct *psCNG = &psDec->sCNG;
SAVE_STACK;
@@ -125,11 +125,20 @@
/* Add CNG when packet is lost or during DTX */
if( psDec->lossCnt ) {
VARDECL( opus_int32, CNG_sig_Q10 );
-
ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 );
/* Generate CNG excitation */
- silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed );
+ gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] );
+ if( gain_Q16 >= (1 << 21) || psCNG->CNG_smth_Gain_Q16 > (1 << 23) ) {
+ gain_Q16 = silk_SMULTT( gain_Q16, gain_Q16 );
+ gain_Q16 = silk_SUB_LSHIFT32(silk_SMULTT( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
+ gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 16 );
+ } else {
+ gain_Q16 = silk_SMULWW( gain_Q16, gain_Q16 );
+ gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
+ gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 );
+ }
+ silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, gain_Q16, length, &psCNG->rand_seed );
/* Convert CNG NLSF to filter representation */
silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
@@ -162,7 +171,7 @@
/* Update states */
CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 );
- frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( sum_Q6, 6 ) );
+ frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( CNG_sig_Q10[ MAX_LPC_ORDER + i ], 10 ) );
}
silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
} else {
diff --git a/lib/rbcodec/codecs/libopus/silk/PLC.c b/lib/rbcodec/codecs/libopus/silk/PLC.c
index 01f4001..8b0a8fe 100644
--- a/lib/rbcodec/codecs/libopus/silk/PLC.c
+++ b/lib/rbcodec/codecs/libopus/silk/PLC.c
@@ -165,6 +165,30 @@
psPLC->nb_subfr = psDec->nb_subfr;
}
+static OPUS_INLINE void silk_PLC_energy(opus_int32 *energy1, opus_int *shift1, opus_int32 *energy2, opus_int *shift2,
+ const opus_int32 *exc_Q14, const opus_int32 *prevGain_Q10, int subfr_length, int nb_subfr)
+{
+ int i, k;
+ VARDECL( opus_int16, exc_buf );
+ opus_int16 *exc_buf_ptr;
+ SAVE_STACK;
+ ALLOC( exc_buf, 2*subfr_length, opus_int16 );
+ /* Find random noise component */
+ /* Scale previous excitation signal */
+ exc_buf_ptr = exc_buf;
+ for( k = 0; k < 2; k++ ) {
+ for( i = 0; i < subfr_length; i++ ) {
+ exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT(
+ silk_SMULWW( exc_Q14[ i + ( k + nb_subfr - 2 ) * subfr_length ], prevGain_Q10[ k ] ), 8 ) );
+ }
+ exc_buf_ptr += subfr_length;
+ }
+ /* Find the subframe with lowest energy of the last two and use that as random noise generator */
+ silk_sum_sqr_shift( energy1, shift1, exc_buf, subfr_length );
+ silk_sum_sqr_shift( energy2, shift2, &exc_buf[ subfr_length ], subfr_length );
+ RESTORE_STACK;
+}
+
static OPUS_INLINE void silk_PLC_conceal(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I/O Decoder control */
@@ -177,19 +201,26 @@
opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr;
opus_int32 LPC_pred_Q10, LTP_pred_Q12;
opus_int16 rand_scale_Q14;
- opus_int16 *B_Q14, *exc_buf_ptr;
+ opus_int16 *B_Q14;
opus_int32 *sLPC_Q14_ptr;
- VARDECL( opus_int16, exc_buf );
opus_int16 A_Q12[ MAX_LPC_ORDER ];
+#ifdef SMALL_FOOTPRINT
+ opus_int16 *sLTP;
+#else
VARDECL( opus_int16, sLTP );
+#endif
VARDECL( opus_int32, sLTP_Q14 );
silk_PLC_struct *psPLC = &psDec->sPLC;
opus_int32 prevGain_Q10[2];
SAVE_STACK;
- ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 );
- ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
+#ifdef SMALL_FOOTPRINT
+ /* Ugly hack that breaks aliasing rules to save stack: put sLTP at the very end of sLTP_Q14. */
+ sLTP = ((opus_int16*)&sLTP_Q14[psDec->ltp_mem_length + psDec->frame_length])-psDec->ltp_mem_length;
+#else
+ ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
+#endif
prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6);
prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6);
@@ -198,19 +229,7 @@
silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) );
}
- /* Find random noise component */
- /* Scale previous excitation signal */
- exc_buf_ptr = exc_buf;
- for( k = 0; k < 2; k++ ) {
- for( i = 0; i < psPLC->subfr_length; i++ ) {
- exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT(
- silk_SMULWW( psDec->exc_Q14[ i + ( k + psPLC->nb_subfr - 2 ) * psPLC->subfr_length ], prevGain_Q10[ k ] ), 8 ) );
- }
- exc_buf_ptr += psPLC->subfr_length;
- }
- /* Find the subframe with lowest energy of the last two and use that as random noise generator */
- silk_sum_sqr_shift( &energy1, &shift1, exc_buf, psPLC->subfr_length );
- silk_sum_sqr_shift( &energy2, &shift2, &exc_buf[ psPLC->subfr_length ], psPLC->subfr_length );
+ silk_PLC_energy(&energy1, &shift1, &energy2, &shift2, psDec->exc_Q14, prevGain_Q10, psDec->subfr_length, psDec->nb_subfr);
if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) {
/* First sub-frame has lowest energy */
diff --git a/lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h b/lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h
index 1b58057..4be0985 100644
--- a/lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h
+++ b/lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h
@@ -587,6 +587,11 @@
#include "arm/SigProc_FIX_armv5e.h"
#endif
+#if defined(MIPSr1_ASM)
+#include "mips/sigproc_fix_mipsr1.h"
+#endif
+
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/rbcodec/codecs/libopus/silk/code_signs.c b/lib/rbcodec/codecs/libopus/silk/code_signs.c
index 561043c..6ac25cb 100644
--- a/lib/rbcodec/codecs/libopus/silk/code_signs.c
+++ b/lib/rbcodec/codecs/libopus/silk/code_signs.c
@@ -76,7 +76,7 @@
/* Decodes signs of excitation */
void silk_decode_signs(
ec_dec *psRangeDec, /* I/O Compressor data structure */
- opus_int pulses[], /* I/O pulse signal */
+ opus_int16 pulses[], /* I/O pulse signal */
opus_int length, /* I length of input */
const opus_int signalType, /* I Signal type */
const opus_int quantOffsetType, /* I Quantization offset type */
@@ -85,7 +85,7 @@
{
opus_int i, j, p;
opus_uint8 icdf[ 2 ];
- opus_int *q_ptr;
+ opus_int16 *q_ptr;
const opus_uint8 *icdf_ptr;
icdf[ 1 ] = 0;
diff --git a/lib/rbcodec/codecs/libopus/silk/dec_API.c b/lib/rbcodec/codecs/libopus/silk/dec_API.c
index 4cbcf71..1087c67 100644
--- a/lib/rbcodec/codecs/libopus/silk/dec_API.c
+++ b/lib/rbcodec/codecs/libopus/silk/dec_API.c
@@ -31,6 +31,7 @@
#include "API.h"
#include "main.h"
#include "stack_alloc.h"
+#include "os_support.h"
/************************/
/* Decoder Super Struct */
@@ -90,7 +91,8 @@
opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
opus_int32 nSamplesOutDec, LBRR_symbol;
opus_int16 *samplesOut1_tmp[ 2 ];
- VARDECL( opus_int16, samplesOut1_tmp_storage );
+ VARDECL( opus_int16, samplesOut1_tmp_storage1 );
+ VARDECL( opus_int16, samplesOut1_tmp_storage2 );
VARDECL( opus_int16, samplesOut2_tmp );
opus_int32 MS_pred_Q13[ 2 ] = { 0 };
opus_int16 *resample_out_ptr;
@@ -98,6 +100,7 @@
silk_decoder_state *channel_state = psDec->channel_state;
opus_int has_side;
opus_int stereo_to_mono;
+ int delay_stack_alloc;
SAVE_STACK;
silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
@@ -196,7 +199,7 @@
for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
for( n = 0; n < decControl->nChannelsInternal; n++ ) {
if( channel_state[ n ].LBRR_flags[ i ] ) {
- opus_int pulses[ MAX_FRAME_LENGTH ];
+ opus_int16 pulses[ MAX_FRAME_LENGTH ];
opus_int condCoding;
if( decControl->nChannelsInternal == 2 && n == 0 ) {
@@ -251,13 +254,22 @@
psDec->channel_state[ 1 ].first_frame_after_reset = 1;
}
- ALLOC( samplesOut1_tmp_storage,
- decControl->nChannelsInternal*(
- channel_state[ 0 ].frame_length + 2 ),
+ /* Check if the temp buffer fits into the output PCM buffer. If it fits,
+ we can delay allocating the temp buffer until after the SILK peak stack
+ usage. We need to use a < and not a <= because of the two extra samples. */
+ delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal
+ < decControl->API_sampleRate*decControl->nChannelsAPI;
+ ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE
+ : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
opus_int16 );
- samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
- samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
- + channel_state[ 0 ].frame_length + 2;
+ if ( delay_stack_alloc )
+ {
+ samplesOut1_tmp[ 0 ] = samplesOut;
+ samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2;
+ } else {
+ samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
+ samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
+ }
if( lostFlag == FLAG_DECODE_NORMAL ) {
has_side = !decode_only_middle;
@@ -312,6 +324,15 @@
resample_out_ptr = samplesOut;
}
+ ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc
+ ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 )
+ : ALLOC_NONE,
+ opus_int16 );
+ if ( delay_stack_alloc ) {
+ OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2));
+ samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2;
+ samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2;
+ }
for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
/* Resample decoded signal to API_sampleRate */
diff --git a/lib/rbcodec/codecs/libopus/silk/decode_core.c b/lib/rbcodec/codecs/libopus/silk/decode_core.c
index 87fbd5d..af68b75 100644
--- a/lib/rbcodec/codecs/libopus/silk/decode_core.c
+++ b/lib/rbcodec/codecs/libopus/silk/decode_core.c
@@ -39,7 +39,7 @@
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I Decoder control */
opus_int16 xq[], /* O Decoded speech */
- const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */
+ const opus_int16 pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */
)
{
opus_int i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType;
@@ -49,7 +49,7 @@
opus_int32 LTP_pred_Q13, LPC_pred_Q10, Gain_Q10, inv_gain_Q31, gain_adj_Q16, rand_seed, offset_Q10;
opus_int32 *pred_lag_ptr, *pexc_Q14, *pres_Q14;
VARDECL( opus_int32, res_Q14 );
-/* VARDECL( opus_int32, sLPC_Q14 ); */
+ VARDECL( opus_int32, sLPC_Q14 );
SAVE_STACK;
silk_assert( psDec->prev_gain_Q16 != 0 );
@@ -57,8 +57,7 @@
ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
ALLOC( sLTP_Q15, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
ALLOC( res_Q14, psDec->subfr_length, opus_int32 );
-/* ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 ); */
- opus_int32 sLPC_Q14[psDec->subfr_length + MAX_LPC_ORDER]; /* worst case is 80 + 16 */
+ ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 );
offset_Q10 = silk_Quantization_Offsets_Q10[ psDec->indices.signalType >> 1 ][ psDec->indices.quantOffsetType ];
diff --git a/lib/rbcodec/codecs/libopus/silk/decode_frame.c b/lib/rbcodec/codecs/libopus/silk/decode_frame.c
index abc00a3..6a7cffb 100644
--- a/lib/rbcodec/codecs/libopus/silk/decode_frame.c
+++ b/lib/rbcodec/codecs/libopus/silk/decode_frame.c
@@ -47,13 +47,10 @@
{
VARDECL( silk_decoder_control, psDecCtrl );
opus_int L, mv_len, ret = 0;
- VARDECL( opus_int, pulses );
SAVE_STACK;
L = psDec->frame_length;
ALLOC( psDecCtrl, 1, silk_decoder_control );
- ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
- ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int );
psDecCtrl->LTP_scale_Q14 = 0;
/* Safety checks */
@@ -62,6 +59,9 @@
if( lostFlag == FLAG_DECODE_NORMAL ||
( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) )
{
+ VARDECL( opus_int16, pulses );
+ ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
+ ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int16 );
/*********************************************/
/* Decode quantization indices of side info */
/*********************************************/
@@ -107,16 +107,16 @@
silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
- /****************************************************************/
- /* Ensure smooth connection of extrapolated and good frames */
- /****************************************************************/
- silk_PLC_glue_frames( psDec, pOut, L );
-
/************************************************/
/* Comfort noise generation / estimation */
/************************************************/
silk_CNG( psDec, psDecCtrl, pOut, L );
+ /****************************************************************/
+ /* Ensure smooth connection of extrapolated and good frames */
+ /****************************************************************/
+ silk_PLC_glue_frames( psDec, pOut, L );
+
/* Update some decoder state variables */
psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ];
diff --git a/lib/rbcodec/codecs/libopus/silk/decode_pulses.c b/lib/rbcodec/codecs/libopus/silk/decode_pulses.c
index e8a87c2..1e14bc3 100644
--- a/lib/rbcodec/codecs/libopus/silk/decode_pulses.c
+++ b/lib/rbcodec/codecs/libopus/silk/decode_pulses.c
@@ -36,7 +36,7 @@
/*********************************************/
void silk_decode_pulses(
ec_dec *psRangeDec, /* I/O Compressor data structure */
- opus_int pulses[], /* O Excitation signal */
+ opus_int16 pulses[], /* O Excitation signal */
const opus_int signalType, /* I Sigtype */
const opus_int quantOffsetType, /* I quantOffsetType */
const opus_int frame_length /* I Frame length */
@@ -44,7 +44,7 @@
{
opus_int i, j, k, iter, abs_q, nLS, RateLevelIndex;
opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ];
- opus_int *pulses_ptr;
+ opus_int16 *pulses_ptr;
const opus_uint8 *cdf_ptr;
/*********************/
@@ -84,7 +84,7 @@
if( sum_pulses[ i ] > 0 ) {
silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] );
} else {
- silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( opus_int ) );
+ silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( pulses[0] ) );
}
}
diff --git a/lib/rbcodec/codecs/libopus/silk/macros.h b/lib/rbcodec/codecs/libopus/silk/macros.h
index 482dc3c..05623b5 100644
--- a/lib/rbcodec/codecs/libopus/silk/macros.h
+++ b/lib/rbcodec/codecs/libopus/silk/macros.h
@@ -79,17 +79,24 @@
(( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) : \
((((a)^0x80000000) & (b) & 0x80000000) ? silk_int32_MAX : (a)-(b)) )
-#include "ecintrin.h"
+#if defined(MIPSr1_ASM)
+#include "mips/macros_mipsr1.h"
+#endif
+#include "ecintrin.h"
+#ifndef OVERRIDE_silk_CLZ16
static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16)
{
return 32 - EC_ILOG(in16<<16|0x8000);
}
+#endif
+#ifndef OVERRIDE_silk_CLZ32
static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32)
{
return in32 ? 32 - EC_ILOG(in32) : 32;
}
+#endif
/* Row based */
#define matrix_ptr(Matrix_base_adr, row, column, N) \
diff --git a/lib/rbcodec/codecs/libopus/silk/main.h b/lib/rbcodec/codecs/libopus/silk/main.h
index 2bdf897..77524f5 100644
--- a/lib/rbcodec/codecs/libopus/silk/main.h
+++ b/lib/rbcodec/codecs/libopus/silk/main.h
@@ -116,7 +116,7 @@
/* Decodes signs of excitation */
void silk_decode_signs(
ec_dec *psRangeDec, /* I/O Compressor data structure */
- opus_int pulses[], /* I/O pulse signal */
+ opus_int16 pulses[], /* I/O pulse signal */
opus_int length, /* I length of input */
const opus_int signalType, /* I Signal type */
const opus_int quantOffsetType, /* I Quantization offset type */
@@ -161,7 +161,7 @@
/* Shell decoder, operates on one shell code frame of 16 pulses */
void silk_shell_decoder(
- opus_int *pulses0, /* O data: nonnegative pulse amplitudes */
+ opus_int16 *pulses0, /* O data: nonnegative pulse amplitudes */
ec_dec *psRangeDec, /* I/O Compressor data structure */
const opus_int pulses4 /* I number of pulses per pulse-subframe */
);
@@ -397,13 +397,13 @@
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I Decoder control */
opus_int16 xq[], /* O Decoded speech */
- const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */
+ const opus_int16 pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */
);
/* Decode quantization indices of excitation (Shell coding) */
void silk_decode_pulses(
ec_dec *psRangeDec, /* I/O Compressor data structure */
- opus_int pulses[], /* O Excitation signal */
+ opus_int16 pulses[], /* O Excitation signal */
const opus_int signalType, /* I Sigtype */
const opus_int quantOffsetType, /* I quantOffsetType */
const opus_int frame_length /* I Frame length */
diff --git a/lib/rbcodec/codecs/libopus/silk/resampler_private_IIR_FIR.c b/lib/rbcodec/codecs/libopus/silk/resampler_private_IIR_FIR.c
index c7b4f6e..6b2b3a2 100644
--- a/lib/rbcodec/codecs/libopus/silk/resampler_private_IIR_FIR.c
+++ b/lib/rbcodec/codecs/libopus/silk/resampler_private_IIR_FIR.c
@@ -72,13 +72,10 @@
silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS;
opus_int32 nSamplesIn;
opus_int32 max_index_Q16, index_increment_Q16;
-/* VARDECL( opus_int16, buf );
- SAVE_STACK; */
+ VARDECL( opus_int16, buf );
+ SAVE_STACK;
-/* ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 ); */
-
- /* worst case = 2*16*10+8 = 328 * 2 = 656bytes */
- opus_int16 buf[2 * S->batchSize + RESAMPLER_ORDER_FIR_12];
+ ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 );
/* Copy buffered samples to start of buffer */
silk_memcpy( buf, S->sFIR.i16, RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) );
@@ -106,5 +103,5 @@
/* Copy last part of filtered signal to the state for the next call */
silk_memcpy( S->sFIR.i16, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) );
-/* RESTORE_STACK; */
+ RESTORE_STACK;
}
diff --git a/lib/rbcodec/codecs/libopus/silk/shell_coder.c b/lib/rbcodec/codecs/libopus/silk/shell_coder.c
index 9d6e1bb..d80dd51 100644
--- a/lib/rbcodec/codecs/libopus/silk/shell_coder.c
+++ b/lib/rbcodec/codecs/libopus/silk/shell_coder.c
@@ -60,8 +60,8 @@
#endif
static OPUS_INLINE void decode_split(
- opus_int *p_child1, /* O pulse amplitude of first child subframe */
- opus_int *p_child2, /* O pulse amplitude of second child subframe */
+ opus_int16 *p_child1, /* O pulse amplitude of first child subframe */
+ opus_int16 *p_child2, /* O pulse amplitude of second child subframe */
ec_dec *psRangeDec, /* I/O Compressor data structure */
const opus_int p, /* I pulse amplitude of current subframe */
const opus_uint8 *shell_table /* I table of shell cdfs */
@@ -121,12 +121,12 @@
/* Shell decoder, operates on one shell code frame of 16 pulses */
void silk_shell_decoder(
- opus_int *pulses0, /* O data: nonnegative pulse amplitudes */
+ opus_int16 *pulses0, /* O data: nonnegative pulse amplitudes */
ec_dec *psRangeDec, /* I/O Compressor data structure */
const opus_int pulses4 /* I number of pulses per pulse-subframe */
)
{
- opus_int pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ];
+ opus_int16 pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ];
/* this function operates on one shell code frame of 16 pulses */
silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 );
diff --git a/lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c b/lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c
index 12514c9..129df19 100644
--- a/lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c
+++ b/lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c
@@ -53,6 +53,7 @@
/* Scale down */
nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
shft = 2;
+ i+=2;
break;
}
}
diff --git a/lib/rbcodec/codecs/opus.c b/lib/rbcodec/codecs/opus.c
index 2c495aa..842803a 100644
--- a/lib/rbcodec/codecs/opus.c
+++ b/lib/rbcodec/codecs/opus.c
@@ -337,8 +337,6 @@
param = ci->id3->elapsed;
strtoffset = ci->id3->offset;
- global_stack = 0;
-
#if defined(CPU_COLDFIRE)
/* EMAC rounding is disabled because of MULT16_32_Q15, which will be
inaccurate with rounding in its current incarnation */