webkit  2cdf99a9e3038c7e01b3c37e8ad903ecbe5eecf1
https://github.com/WebKit/webkit
mdct_mipsr1.h
Go to the documentation of this file.
1 /* Copyright (c) 2007-2008 CSIRO
2  Copyright (c) 2007-2008 Xiph.Org Foundation
3  Written by Jean-Marc Valin */
4 /*
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions
7  are met:
8 
9  - Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  - Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28 
29 /* This is a simple MDCT implementation that uses a N/4 complex FFT
30  to do most of the work. It should be relatively straightforward to
31  plug in pretty much and FFT here.
32 
33  This replaces the Vorbis FFT (and uses the exact same API), which
34  was a bit too messy and that was ending up duplicating code
35  (might as well use the same FFT everywhere).
36 
37  The algorithm is similar to (and inspired from) Fabrice Bellard's
38  MDCT implementation in FFMPEG, but has differences in signs, ordering
39  and scaling in many places.
40 */
41 #ifndef __MDCT_MIPSR1_H__
42 #define __MDCT_MIPSR1_H__
43 
44 #ifndef SKIP_CONFIG_H
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48 #endif
49 
50 #include "mdct.h"
51 #include "kiss_fft.h"
52 #include "_kiss_fft_guts.h"
53 #include <math.h>
54 #include "os_support.h"
55 #include "mathops.h"
56 #include "stack_alloc.h"
57 
58 /* Forward MDCT trashes the input array */
59 #define OVERRIDE_clt_mdct_forward
60 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
61  const opus_val16 *window, int overlap, int shift, int stride, int arch)
62 {
63  int i;
64  int N, N2, N4;
67  const kiss_fft_state *st = l->kfft[shift];
68  const kiss_twiddle_scalar *trig;
69  opus_val16 scale;
70 #ifdef FIXED_POINT
71  /* Allows us to scale with MULT16_32_Q16(), which is faster than
72  MULT16_32_Q15() on ARM. */
73  int scale_shift = st->scale_shift-1;
74 #endif
75 
76  (void)arch;
77 
78  SAVE_STACK;
79  scale = st->scale;
80 
81  N = l->n;
82  trig = l->trig;
83  for (i=0;i<shift;i++)
84  {
85  N >>= 1;
86  trig += N;
87  }
88  N2 = N>>1;
89  N4 = N>>2;
90 
91  ALLOC(f, N2, kiss_fft_scalar);
92  ALLOC(f2, N4, kiss_fft_cpx);
93 
94  /* Consider the input to be composed of four blocks: [a, b, c, d] */
95  /* Window, shuffle, fold */
96  {
97  /* Temp pointers to make it really clear to the compiler what we're doing */
98  const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
99  const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
100  kiss_fft_scalar * OPUS_RESTRICT yp = f;
101  const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
102  const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
103  for(i=0;i<((overlap+3)>>2);i++)
104  {
105  /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
106  *yp++ = S_MUL_ADD(*wp2, xp1[N2],*wp1,*xp2);
107  *yp++ = S_MUL_SUB(*wp1, *xp1,*wp2, xp2[-N2]);
108  xp1+=2;
109  xp2-=2;
110  wp1+=2;
111  wp2-=2;
112  }
113  wp1 = window;
114  wp2 = window+overlap-1;
115  for(;i<N4-((overlap+3)>>2);i++)
116  {
117  /* Real part arranged as a-bR, Imag part arranged as -c-dR */
118  *yp++ = *xp2;
119  *yp++ = *xp1;
120  xp1+=2;
121  xp2-=2;
122  }
123  for(;i<N4;i++)
124  {
125  /* Real part arranged as a-bR, Imag part arranged as -c-dR */
126  *yp++ = S_MUL_SUB(*wp2, *xp2, *wp1, xp1[-N2]);
127  *yp++ = S_MUL_ADD(*wp2, *xp1, *wp1, xp2[N2]);
128  xp1+=2;
129  xp2-=2;
130  wp1+=2;
131  wp2-=2;
132  }
133  }
134  /* Pre-rotation */
135  {
136  kiss_fft_scalar * OPUS_RESTRICT yp = f;
137  const kiss_twiddle_scalar *t = &trig[0];
138  for(i=0;i<N4;i++)
139  {
140  kiss_fft_cpx yc;
141  kiss_twiddle_scalar t0, t1;
142  kiss_fft_scalar re, im, yr, yi;
143  t0 = t[i];
144  t1 = t[N4+i];
145  re = *yp++;
146  im = *yp++;
147 
148  yr = S_MUL_SUB(re,t0,im,t1);
149  yi = S_MUL_ADD(im,t0,re,t1);
150 
151  yc.r = yr;
152  yc.i = yi;
153  yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
154  yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
155  f2[st->bitrev[i]] = yc;
156  }
157  }
158 
159  /* N/4 complex FFT, does not downscale anymore */
160  opus_fft_impl(st, f2);
161 
162  /* Post-rotate */
163  {
164  /* Temp pointers to make it really clear to the compiler what we're doing */
165  const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
166  kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
167  kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
168  const kiss_twiddle_scalar *t = &trig[0];
169  /* Temp pointers to make it really clear to the compiler what we're doing */
170  for(i=0;i<N4;i++)
171  {
172  kiss_fft_scalar yr, yi;
173  yr = S_MUL_SUB(fp->i,t[N4+i] , fp->r,t[i]);
174  yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]);
175  *yp1 = yr;
176  *yp2 = yi;
177  fp++;
178  yp1 += 2*stride;
179  yp2 -= 2*stride;
180  }
181  }
183 }
184 
185 #define OVERRIDE_clt_mdct_backward
187  const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
188 {
189  int i;
190  int N, N2, N4;
191  const kiss_twiddle_scalar *trig;
192 
193  (void)arch;
194 
195  N = l->n;
196  trig = l->trig;
197  for (i=0;i<shift;i++)
198  {
199  N >>= 1;
200  trig += N;
201  }
202  N2 = N>>1;
203  N4 = N>>2;
204 
205  /* Pre-rotate */
206  {
207  /* Temp pointers to make it really clear to the compiler what we're doing */
208  const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
209  const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
210  kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
211  const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
212  const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
213  for(i=0;i<N4;i++)
214  {
215  int rev;
216  kiss_fft_scalar yr, yi;
217  rev = *bitrev++;
218  yr = S_MUL_ADD(*xp2, t[i] , *xp1, t[N4+i]);
219  yi = S_MUL_SUB(*xp1, t[i] , *xp2, t[N4+i]);
220  /* We swap real and imag because we use an FFT instead of an IFFT. */
221  yp[2*rev+1] = yr;
222  yp[2*rev] = yi;
223  /* Storing the pre-rotation directly in the bitrev order. */
224  xp1+=2*stride;
225  xp2-=2*stride;
226  }
227  }
228 
229  opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
230 
231  /* Post-rotate and de-shuffle from both ends of the buffer at once to make
232  it in-place. */
233  {
234  kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
235  kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
236  const kiss_twiddle_scalar *t = &trig[0];
237  /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
238  middle pair will be computed twice. */
239  for(i=0;i<(N4+1)>>1;i++)
240  {
241  kiss_fft_scalar re, im, yr, yi;
242  kiss_twiddle_scalar t0, t1;
243  /* We swap real and imag because we're using an FFT instead of an IFFT. */
244  re = yp0[1];
245  im = yp0[0];
246  t0 = t[i];
247  t1 = t[N4+i];
248  /* We'd scale up by 2 here, but instead it's done when mixing the windows */
249  yr = S_MUL_ADD(re,t0 , im,t1);
250  yi = S_MUL_SUB(re,t1 , im,t0);
251  /* We swap real and imag because we're using an FFT instead of an IFFT. */
252  re = yp1[1];
253  im = yp1[0];
254  yp0[0] = yr;
255  yp1[1] = yi;
256 
257  t0 = t[(N4-i-1)];
258  t1 = t[(N2-i-1)];
259  /* We'd scale up by 2 here, but instead it's done when mixing the windows */
260  yr = S_MUL_ADD(re,t0,im,t1);
261  yi = S_MUL_SUB(re,t1,im,t0);
262  yp1[0] = yr;
263  yp0[1] = yi;
264  yp0 += 2;
265  yp1 -= 2;
266  }
267  }
268 
269  /* Mirror on both sides for TDAC */
270  {
271  kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
272  kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
273  const opus_val16 * OPUS_RESTRICT wp1 = window;
274  const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
275 
276  for(i = 0; i < overlap/2; i++)
277  {
278  kiss_fft_scalar x1, x2;
279  x1 = *xp1;
280  x2 = *yp1;
281  *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
282  *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
283  wp1++;
284  wp2--;
285  }
286  }
287 }
288 #endif /* __MDCT_MIPSR1_H__ */
kiss_fft_scalar i
Definition: kiss_fft.h:67
#define RESTORE_STACK
Definition: stack_alloc.h:148
void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *OPUS_RESTRICT out, const opus_val16 *window, int overlap, int shift, int stride, int arch)
Definition: mdct_mipsr1.h:60
short opus_int16
Definition: opus_types.h:144
bool t
Definition: UpdateContents.py:37
opus_val16 scale
Definition: kiss_fft.h:88
Definition: kiss_fft.h:86
#define MULT16_32_Q15(a, b)
Definition: arch.h:216
#define SAVE_STACK
Definition: stack_alloc.h:157
float opus_val16
Definition: arch.h:148
void
Definition: AVFoundationCFSoftLinking.h:81
const opus_int16 * bitrev
Definition: kiss_fft.h:94
const kiss_fft_state * kfft[4]
Definition: mdct.h:52
int int * out
Definition: gcc-loops.cpp:206
void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *OPUS_RESTRICT out, const opus_val16 *OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
Definition: mdct_mipsr1.h:186
Various math functions.
#define kiss_fft_scalar
Definition: kiss_fft.h:59
void shift(ExecState *exec, JSObject *thisObj, unsigned header, unsigned currentCount, unsigned resultCount, unsigned length)
Definition: ArrayPrototype.cpp:291
#define ALLOC(var, size, type)
Definition: stack_alloc.h:156
#define N
Definition: gcc-loops.cpp:14
string arch
Definition: runtests.py:65
GLfloat f
Definition: gl2.h:417
EGLSurface EGLNativeWindowType * window
Definition: eglext.h:934
Definition: kiss_fft.h:65
#define VARDECL(type, var)
Definition: stack_alloc.h:155
int n
Definition: mdct.h:50
Temporary memory allocation on stack.
#define PSHR32(a, shift)
Definition: arch.h:195
for i
Definition: complexityMeasures.m:24
#define MULT16_32_Q16(a, b)
Definition: arch.h:217
const kiss_twiddle_scalar *OPUS_RESTRICT trig
Definition: mdct.h:53
Definition: mdct.h:49
const T1 const T2 & f2
Definition: gtest-tuple.h:658
EGLImageKHR EGLint EGLint EGLint * stride
Definition: eglext.h:851
kiss_fft_scalar r
Definition: kiss_fft.h:66
#define kiss_twiddle_scalar
Definition: kiss_fft.h:60
void opus_fft_impl(const kiss_fft_state *st, kiss_fft_cpx *fout)
Definition: kiss_fft.c:521