diff --git a/INSTALL b/INSTALL
index a2ed2dcfc..dfde4ba82 100644
--- a/INSTALL
+++ b/INSTALL
@@ -9,6 +9,7 @@ GSL Shell needs the following external libraries:
 - GSL Library, version 1.14 or 1.15
 - GNU readline library (on Linux only)
 - Freetype2 library, version 2.4.10
+- FFTW shared library, version 3.3+
 
 To build the Graphical user interface you will need also the FOX 1.6 library.
 
diff --git a/Makefile b/Makefile
index 3a3c2cb3f..bca4113f4 100644
--- a/Makefile
+++ b/Makefile
@@ -58,7 +58,7 @@ ifeq ($(strip $(USE_READLINE)),yes)
   C_SRC_FILES += completion.c
 endif
 
-LUA_BASE_FILES = bspline.lua fft-init.lua integ-init.lua template.lua check.lua \
+LUA_BASE_FILES = bspline.lua fft.lua fftw3/init.lua fftw3/cdefs.lua fftw3/defines.lua integ-init.lua template.lua check.lua \
 	graph-init.lua rng.lua rnd.lua randist.lua iter.lua time.lua gsl-check.lua linfit.lua \
 	roots.lua contour.lua gsl.lua matrix.lua csv.lua gslext.lua num.lua demo-init.lua \
 	import.lua plot3d.lua sf.lua vegas.lua eigen.lua help.lua cgdt.lua expr-actions.lua \
diff --git a/demos/fft.lua b/demos/fft.lua
index 3c0f189e0..f4c9ab55b 100644
--- a/demos/fft.lua
+++ b/demos/fft.lua
@@ -30,13 +30,13 @@ local function demo1()
 
    pt:addline(filine(|i| sq[i], n), 'black')
 
-   local ft = fft(sq)
+   local ft = rfft(sq)
 
-   local pf = fibars(|k| complex.abs(ft[k]), 0, n/2, 'black')
+   local pf = fibars(|k| complex.abs(ft[k]), 1, n/2+1, 'black')
    pf.title = 'FFT Power Spectrum'
 
-   for k=ncut, n - ncut do ft[k] = 0 end
-   sqt = fftinv(ft)
+   for k=ncut, n/2+1 do ft[k] = 0 end
+   local sqt = rfftinv(ft)/n
 
    pt:addline(filine(|i| sqt[i], n), 'red')
    pt:show()
@@ -54,14 +54,14 @@ local function demo2()
 
    pt:addline(filine(|i| sq[i], n), 'black')
 
-   ft = fft(sq, true)
+   ft = rfft(sq)
 
-   pf:add(ibars(iter.isample(|k| complex.abs(ft[k]), 0, n/2)), 'black')
+   pf:add(ibars(iter.isample(|k| complex.abs(ft[k]), 1, #ft)), 'black')
 
-   for k=ncut, n - ncut do ft[k] = 0 end
-   fftinv(ft, true)
+   for k=ncut, #ft do ft[k] = 0 end
+   local sqt = rfftinv(ft)/n
 
-   pt:addline(filine(|i| sq[i], n), 'red')
+   pt:addline(filine(|i| sqt[i], n), 'red')
 
    pf:show()
    pt:show()
@@ -79,15 +79,15 @@ local function demo3()
    local p = plot('Original signal / reconstructed')
    p:addline(filine(|i| bess[i], n), 'black')
 
-   local ft = fft(bess)
+   local ft = rfft(bess)
 
    fftplot = plot('FFT power spectrum')
-   bars = ibars(iter.isample(|k| complex.abs(ft[k]), 0, 60))
+   bars = ibars(iter.isample(|k| complex.abs(ft[k]), 1, 60))
    fftplot:add(bars, 'black')
    fftplot:show()
 
-   for k=ncut, n/2 do ft[k] = 0 end
-   local bessr = fftinv(ft)
+   for k=ncut, #ft do ft[k] = 0 end
+   local bessr = rfftinv(ft)/n
 
    p:addline(filine(|i| bessr[i], n), 'red', {{'dash', 7, 3}})
    p:show()
diff --git a/doc/gsl-shell-index/authors.rst b/doc/gsl-shell-index/authors.rst
index 04fc660b7..c3e5a0cbf 100644
--- a/doc/gsl-shell-index/authors.rst
+++ b/doc/gsl-shell-index/authors.rst
@@ -8,6 +8,6 @@ The main author of GSL Shell is **Francesco Abbate**. He was so fearless that he
 
 The VEGAS algorithm implementation is from **Lesley De Cruz**. Since he joined the project the author feels less lonely and knows that the user base of GSL Shell is of at least two people (including the authors).
 
-The new implementation of the Special Function and Eigensystem modules are from **Benjamin von Ardenne**. The author suspect it is a pseudonym of Lesley so that the author have the impression of having a huge user base (three users).
+The new implementation of the Special Function, Eigensystem and Fast Fourier Transformation modules are contributed by **Benjamin von Ardenne**. The author suspect it is a pseudonym of Lesley so that the author have the impression of having a huge user base (three users).
 
 The smart auto-complete function is based on the work of **Jesus Romero Hebrero**. Desperate about the bad English of Francesco he is currently striving to learn Italian and works on an integrated editor in the spare time.
diff --git a/doc/user-manual/fft.rst b/doc/user-manual/fft.rst
index 5bdce72ff..3d5694757 100644
--- a/doc/user-manual/fft.rst
+++ b/doc/user-manual/fft.rst
@@ -31,106 +31,96 @@ and the definition of the inverse Fourier transform is,
 
 The factor of 1/n makes this a true inverse.
 
-In general there are two possible choices for the sign of the exponential in the transform/ inverse-transform pair.
-GSL follows the same convention as fftpack, using a negative exponential for the forward transform.
+In general there are two possible choices for the sign of the exponential in the transform/ inverse-transform pair. GSL follows the same convention as FFTW, using a negative exponential for the forward transform.
 The advantage of this convention is that the inverse transform recreates the original function with simple Fourier synthesis.
 Numerical Recipes uses the opposite convention, a positive exponential in the forward transform.
 
-GSL Shell interface
--------------------
 
-GSL Shell provide a simple interface to perform Fourier transforms of real data with the functions :func:`num.fft` and :func:`num.fftinv`.
-The first function performs the Fourier transform of a column matrix and the second is the inverse Fourier transform.
-
-The function :func:`num.fft` returns a half-complex array.
-This latter is similar to a column matrix of complex numbers, but it is actually a different object because the numbers are packed together following some specific rules related to the algorithm.
+Methods
+------------------------------
 
-The idea is that you can access the elements of this vector for reading or writing simply by indexing it.
-You can also obtain the size of the vector using the operator '#'.
-The valid indices for a half-complex object range from 0 to N-1 where N is the size if the vector.
-Each element of the vector corresponds to the coefficient :math:`z_k` defined above.
+The FFT implementation in gsl-shell is an interface to the `FFTW library <http://www.fftw.org/fftw3_doc/>`_ which is well tested and very fast for repetitive transformations.
 
-When performing Fourier transforms, it is important to know that the computation speed can be greatly influenced by the size of the vector. If the size is a power of two, a very efficient algorithm can be used and we can talk in this case of a Fast Fourier Transform (FFT). In addition, the algorithm has the advantage that it does not require any additional workspace. When the size of the vector is not a power of two, we can have two different cases:
+.. function:: fft(input [, output])
 
- * the size is a product of small prime numbers
- * the size contains a big (> 7) prime number in its factorization
+   Performs the Fourier transform of the complex-valued column matrix ``input``.
+   The transformed signal is stored in a complex-valued column matrix ``output`` which is allocated by the function each time. To speed up repetitive Fourier transformations it is recommended to give the output column matrix as a second argument to eliminate the allocation overhead. 
 
-This detail is important because if the size is a product of small prime numbers, a fast algorithm is still available but it is still somewhat slower and it does require some additional workspace.
-In the worst case when the size cannot be factorized to small prime numbers, the Fourier transform can still be computed but the calculation is slower, especially for large arrays.
+.. function:: fftinv(input [, output])
 
-GSL Shell hides all the details and takes care of choosing the appropriate algorithm based on the size of the vector.
-It also transparently provides any additional workspace that may be needed for the algorithm.
-In order to avoid repeated allocation of workspace memory, the workspace allocated is kept in memory and reused *if the size of the array does not change*.
-This means that the approach of GSL Shell is quite optimal if you perform many Fourier transforms (direct or inverse) of the same size.
+   Performas the inverse Fourier transformation of the complex-valued column matrix ``input``. The output is stored in a complex-valued column matrix which is allocated by the function each time. To speed up repetitive Fourier transformations it is recommended to give the output column matrix as a second argument to eliminate the allocation overhead.
 
-Even though GSL Shell takes care of the details automatically, you should be aware of these performance notices because it can make a big difference in real applications.
-From a practical point of view, it is useful in most cases to always provide samples whose size is a power of two.
+   This transformation is the inverse of the function :func:`num.fft`, so that if you perform the two transformations consecutively you will obtain a vector identical to the initial one up to a factor ``size=#input``.
 
-Another property of the functions :func:`num.fft` and :func:`num.fftinv` is that they can optionally perform the transformation *in place* by modifying the original data instead of creating a copy.
-When a transformation *in place* is requested, the routine still returns a new vector (either a real matrix or a half-complex array) but this latter will point to the same underlying data of the original vector.
-The transformation *in place* can be useful in some cases to avoid unnecessary data copying and memory allocation.
+   A typical usage of :func:`fftinv` is to revert the transformation made with :func:`fft` but by doing some transformations along the way.
+   So a typical usage path could be::
 
+      -- we assume v is a column matrix with our data
+      ft = num.fft(v) -- Fourier transform
 
-Fourier Transform of Real Data
-------------------------------
+      -- here we can manipulate the half-complex array 'ft'
+      -- using the methods `get' and `set'
+      {some code here}
 
-For real data, the Fourier coefficients satisfy the relation
+      vt = num.fftinv(ft)/#ft -- we perform the inverse Fourier transform
+      -- now vt is a vector of the same size of v
 
-.. math::
-     z_k = z_{N-k}^*
+.. function:: rfft(input[, output])
 
-where N is the size of the vector and k is any integer number from 0 to N-1.
-Because of this relation, the data is packed in a special type of object called a half-complex array.
+    Performs the Fourier transformation of real-valued input and complex-valued output.
+    In the output, the Fourier coefficients satisfy the relation
 
-To access an element in a half-complex array, you can index it with an integer number between 0 and N-1, inclusive. So, for example::
+    .. math::
+         z_k = z_{N-k}^*
 
-   -- get a random number generator
-   r = rng.new()
+    where :math:`N` is the size of the vector and :math:`k` is any integer number from 0 to :math:`N-1`.
+    As a result of this symmetry, half of the output :math:`z` is redundant (being the complex conjugate of the other half), and so the transform only outputs elements :math:`0...\frac{n}{2}` of :math:`z` (:math:`\frac{n}{2}+1` complex numbers), where the division by 2 is rounded down.
 
-   -- create a vector with random numbers
-   x = matrix.new(256, 1, || rnd.gaussian(r, 1))
+.. function:: rfftinv(input[, output])
 
-   -- take the Fourier transform
-   ft = num.fft(x)
+    Performs the inverse Fourier transformation with complex-valued input and real-valued output. Due to the implementation of the FFTW library, changes in the input matrix can occur which is why it is copied internally.
+    For the input of :math:`n`, the output has size :math:`(n-1)\times2`.
+    This is the direct inversion of :func:`num.rfft` as see in the example::
 
-   -- print all the coefficients of the Fourier transform
-   for k=0, #ft-1 do print(ft[k]) end
+        --Forward transformation
+        ft = num.rfft(matrix.vec{1,2,3,4})
 
-As shown in the example above, you can use the Lua operator '#' to obtain the size of a half-complex array.
+        --Backward transformation
+        orig = num.rfftinv(ft) / #ft
 
-.. function:: fft(v[, in_place])
+.. function:: fft2(input, [output])
+.. function:: fft2inv(input, [output])
 
-   Perform the Fourier transform of the real-valued column matrix ``x``.
-   If ``in_place`` is ``true`` then the original data is altered and the resulting vector will point to the same underlying data of the original vector.
+    Performs a 2D forward and backward Fourier transformation of an input matrix ``input``. Giving a preallocated output matrix as a second argument speeds up repetitive transformations.
 
-   Please note that the value you obtain is not an ordinary matrix but a half-complex array.
-   You can access the elements of such an array by indexing the vector.
-   If you want to have an ordinary matrix you can easily build it with the following instructions::
+.. function:: rfft2(input, [output])
+    
+    Performs a 2D forward Fourier transformation with real-valued input matrix ``input``. Returns the complex-valued output with reduced dimension. Giving a preallocated output matrix as a second argument speeds up repetitive transformations.
 
-      -- we suppose that f is an half-complex array
-      m = matrix.cnew(#f, 1, |i,j| f[i-1])
+.. function:: rfft2inv(input, [output])
 
-.. function:: fftinv(hc[, in_place])
+    Performs the 2D inverse Fourier transformation with complex-valued input matrix ``input``. Due to the implementation of the FFTW library, changes in the input matrix can occur which is why it is copied internally.
+    Returns the real-valued output with increased dimension. Giving a preallocated output matrix as a second argument speeds up repetitive transformations.
 
-   Return a column matrix that contains the inverse Fourier transform of the half-complex vector ``hc``.
-   If ``in_place`` is ``true`` then the original data is altered and the resulting vector will point to the same underlying data of the original vector.
+.. function:: fftn(input, dimlist, [output])
+.. function:: fftninv(input, dimlist, [output])
 
-   This transformation is the inverse of the function :func:`num.fft`, so that if you perform the two transformations consecutively you will obtain a vector identical to the initial one.
+    Performs the n-dimensional forward and backward Fourier transformation of input data ``input``.
+    The input is considered to be stored in a `row-major format` meaning that if you had an array with dimensions :math:`n_1 \times n_2 \times n_3` then a point at :math:`(i_1,i_2,i_3)` is stored at index position: :math:`i_3 + n_3\cdot(i_2 + n_2\cdot i_1)`.
+    The dimensions are given as a table ``dimlist`` as :math:`{n1, n2, n3, ...}`.
 
-   A typical usage of :func:`fft_inv` is to revert the transformation made with :func:`fft` but by doing some transformations along the way.
-   So a typical usage path could be::
+    As before, providing the output array as well limits the overhead of allocating the output array each call.
 
-      -- we assume v is a column matrix with our data
-      ft = num.fft(v) -- Fourier transform
+.. function:: rfftn(input, dimlist, [output])
+.. function:: rfftninv(input, dimlist, [output])
+    
+    Performs the n-dimensional real forward and backward Fourier transformation. Here the size of the input and output arrays are similar to the 1D and 2D couterparts.
 
-      -- here we can manipulate the half-complex array 'ft'
-      -- using the methods `get' and `set'
-      some code here
+    For forward transformations the real-valued input has size :math:`n_1 \times n_2 \times ... \times n_N` and the complex-valued output is of size :math:`n_1 \times n_2 \times ... \times n_N/2+1`.
 
-      vt = num.fftinv(ft) -- we perform the inverse Fourier transform
-      -- now vt is a vector of the same size of v
+    For the backward transformation the complex-valued input has size :math:`n_1 \times n_2 \times ... \times n_N/2+1` and the real-valued output has size :math:`n_1 \times n_2 \times ... \times n_N` consequently. Due to the implementation of the FFTW library, changes in the input matrix can occur for backward transformations which is why the input matrix is copied internally.
 
-FFT example
+Examples
 -----------
 
 In this example we will treat a square pulse in the temporal domain. To illustrate a typical example of FFT usage we perform the Fourier Transform of the signal and we cut the higher order frequencies. Then we perform the inverse transform and we compare the result with the original time signal.
@@ -157,13 +147,13 @@ Now we are ready to perform:
 
 and plot the results::
 
-   ft = num.fft(y)
+   ft = num.rfft(y)
 
-   pf = graph.fibars(|k| complex.abs(ft[k]), 0, 60)
+   pf = graph.fibars(|k| complex.abs(ft[k]), 1, 60)
    pf.title = 'FFT Power Spectrum'
 
-   for k=ncut, n/2 do ft[k] = 0 end
-   ytr = num.fftinv(ft)
+   for k=ncut, #ft do ft[k] = 0 end
+   ytr = num.rfftinv(ft)/n
 
    pt:addline(graph.filine(|i| ytr[i], n), 'red')
 
diff --git a/fft-init.lua b/fft-init.lua
deleted file mode 100644
index d7bad58b3..000000000
--- a/fft-init.lua
+++ /dev/null
@@ -1,306 +0,0 @@
-
-local ffi = require 'ffi'
-local bit = require 'bit'
-local gsl = require 'gsl'
-
-local gsl_check = require 'gsl-check'
-
-local check     = require 'check'
-local is_integer = check.is_integer
-
-local tobit, band, rshift = bit.tobit, bit.band, bit.rshift
-local tonumber = tonumber
-
-ffi.cdef [[
-   typedef struct
-   {
-      size_t size;
-      size_t stride;
-      double * data;
-      gsl_block * block;
-   } fft_hc;
-
-   typedef struct
-   {
-      size_t size;
-      size_t stride;
-      double * data;
-      gsl_block * block;
-   } fft_radix2_hc;
-
-   typedef struct
-   {
-      size_t n;
-      size_t nf;
-      size_t factor[64];
-      gsl_complex *twiddle[64];
-      gsl_complex *trig;
-   } gsl_fft_real_wavetable;
-
-   typedef struct
-   {
-      size_t n;
-      double *scratch;
-   } gsl_fft_real_workspace;
-
-   typedef struct
-   {
-      size_t n;
-      size_t nf;
-      size_t factor[64];
-      gsl_complex *twiddle[64];
-      gsl_complex *trig;
-   } gsl_fft_halfcomplex_wavetable;
-
-   gsl_fft_real_wavetable * gsl_fft_real_wavetable_alloc (size_t n);
-
-   void gsl_fft_real_wavetable_free (gsl_fft_real_wavetable * wavetable);
-
-   gsl_fft_halfcomplex_wavetable * gsl_fft_halfcomplex_wavetable_alloc (size_t n);
-
-   void gsl_fft_halfcomplex_wavetable_free (gsl_fft_halfcomplex_wavetable * wavetable);
-
-   gsl_fft_real_workspace * gsl_fft_real_workspace_alloc (size_t n);
-
-   void gsl_fft_real_workspace_free (gsl_fft_real_workspace * workspace);
-
-   int gsl_fft_real_radix2_transform (double data[], const size_t stride,
-                                      const size_t n) ;
-
-   int gsl_fft_halfcomplex_radix2_inverse (double data[],
-                                           size_t stride, size_t n);
-
-   int gsl_fft_real_transform (double data[], const size_t stride, const size_t n,
-                               const gsl_fft_real_wavetable * wavetable,
-                               gsl_fft_real_workspace * work);
-
-   int gsl_fft_halfcomplex_inverse (double data[], const size_t stride, const size_t n,
-                                 const gsl_fft_halfcomplex_wavetable * wavetable,
-                                 gsl_fft_real_workspace * work);
-
-   int gsl_fft_halfcomplex_transform (double data[], const size_t stride, const size_t n,
-                                   const gsl_fft_halfcomplex_wavetable * wavetable,
-                                   gsl_fft_real_workspace * work);
-
-]]
-
-local fft_hc        = ffi.typeof('fft_hc')
-local fft_radix2_hc = ffi.typeof('fft_radix2_hc')
-local gsl_matrix    = ffi.typeof('gsl_matrix')
-
-local function is_two_power(n)
-   if n > 0 then
-      local k = tobit(n)
-      while band(k, 1) == 0 do k = rshift(k, 1) end
-      return (k == 1)
-   end
-end
-
-local cache_n = {}
-local cache_r = {}
-
-local function res_allocator(name)
-   local alloc = gsl['gsl_fft_' .. name .. '_alloc']
-   local free  = gsl['gsl_fft_' .. name .. '_free']
-   return function(n)
-             return ffi.gc(alloc(n), free)
-          end
-end
-
-local cache_allocator = {
-   real_wavetable        = res_allocator('real_wavetable'),
-   halfcomplex_wavetable = res_allocator('halfcomplex_wavetable'),
-   real_workspace        = res_allocator('real_workspace')
-}
-
-local function get_resource(name, n)
-   local resource
-   if cache_n[name] ~= n then
-      resource = cache_allocator[name](n)
-      cache_n[name] = n
-      cache_r[name] = resource
-   else
-      resource = cache_r[name]
-   end
-   return resource
-end
-
-local function get_matrix_block(x, ip)
-   local n = tonumber(x.size1)
-   local b, data, stride
-   if ip then
-      b, data, stride = x.block, x.data, x.tda
-      b.ref_count = b.ref_count + 1
-   else
-      b = matrix.block(n)
-      data, stride = b.data, 1
-      for i=0, n-1 do data[i] = x.data[x.tda * i] end
-   end
-   return b, data, stride
-end
-
-local function get_hc_block(ft, ip)
-   local n = tonumber(ft.size)
-   local b, data, stride
-   if ip then
-      b, data, stride = ft.block, ft.data, tonumber(ft.stride)
-      b.ref_count = b.ref_count + 1
-   else
-      b = matrix.block(n)
-      data, stride = b.data, 1
-      for i=0, n-1 do data[i] = ft.data[tonumber(ft.stride) * i] end
-   end
-   return b, data, stride
-end
-
-function num.fft(x, ip)
-   local n = tonumber(x.size1)
-   local b, data, stride = get_matrix_block(x, ip)
-   if is_two_power(n) then
-      gsl_check(gsl.gsl_fft_real_radix2_transform(data, stride, n))
-      return fft_radix2_hc(n, stride, data, b)
-   else
-      local wt = get_resource('real_wavetable', n)
-      local ws = get_resource('real_workspace', n)
-      gsl_check(gsl.gsl_fft_real_transform(data, stride, n, wt, ws))
-      return fft_hc(n, stride, data, b)
-   end      
-end
-
-function num.fftinv(ft, ip)
-   local n = tonumber(ft.size)
-   local b, data, stride = get_hc_block(ft, ip)
-   if is_two_power(n) then
-      gsl_check(gsl.gsl_fft_halfcomplex_radix2_inverse(data, stride, n))
-   else
-      local wt = get_resource('halfcomplex_wavetable', n)
-      local ws = get_resource('real_workspace', n)
-      gsl_check(gsl.gsl_fft_halfcomplex_inverse(data, stride, n, wt, ws))
-   end
-   return gsl_matrix(n, 1, stride, data, b, 1)
-end
-
-local function halfcomplex_radix2_index(n, stride, k)
-   if k < 0 or k >= n then error('invalid halfcomplex index', 2) end
-   local half_n = n/2
-   if k == 0 then
-      return 0, 0
-   elseif k < half_n then 
-      return 1, k, n-k
-   elseif k == half_n then
-      return 0, half_n
-   elseif k > half_n then
-      return -1, n-k, k
-   end
-end
-
-local function halfcomplex_index(n, stride, k)
-   if k < 0 or k >= n then error('invalid halfcomplex index', 2) end
-   local half_n = n/2
-   if k == 0 then
-      return 0, 0
-   elseif k < half_n then 
-      return 1, 2*k-1, 2*k
-   elseif k == half_n then
-      return 0, half_n
-   elseif k > half_n then
-      return -1, 2*(n-k)-1, 2*(n-k)
-   end
-end
-
-local function halfcomplex_get(indexer, data, n, stride, k)
-   local isign, ridx, iidx = indexer(n, stride, k)
-   local r = data[stride*ridx]
-   local i = (isign == 0 and 0 or isign * data[stride*iidx])
-   return complex.new(r, i)
-end
-
-local function halfcomplex_set(indexer, data, n, stride, k, z)
-   local isign, ridx, iidx = indexer(n, stride, k)
-   local r, i = complex.rect(z)
-   data[stride*ridx] = r
-   if isign ~= 0 then
-      data[stride*iidx] = isign * i
-   end
-end
-
-local function hc_length(ft)
-   return tonumber(ft.size)
-end
-
-local function halfcomplex_to_matrix(hc)
-   return matrix.cnew(tonumber(hc.size), 1, function(i) return hc[i-1] end)
-end
-
-local function hc_tostring(hc)
-   local m = halfcomplex_to_matrix(hc)
-   return m:show()
-end
-
-local function hc_radix2_index(ft, k)
-   if is_integer(k) then
-      local idx = halfcomplex_radix2_index
-      local size, stride = tonumber(ft.size), tonumber(ft.stride)
-      return halfcomplex_get(idx, ft.data, size, stride, k)
-   elseif k == 'show' then
-      return hc_tostring
-   end
-end
-
-local function hc_radix2_newindex(ft, k, z)
-   if is_integer(k) then
-      local idx = halfcomplex_radix2_index
-      local size, stride = tonumber(ft.size), tonumber(ft.stride)
-      return halfcomplex_set(idx, ft.data, size, stride, k, z)
-   end
-end
-
-local function hc_index(ft, k)
-   if is_integer(k) then
-      local idx = halfcomplex_index
-      local size, stride = tonumber(ft.size), tonumber(ft.stride)
-      return halfcomplex_get(idx, ft.data, size, stride, k)
-   elseif k == 'show' then
-      return hc_tostring
-   end
-end
-
-local function hc_newindex(ft, k, z)
-   if is_integer(k) then
-      local idx = halfcomplex_index
-      local size, stride = tonumber(ft.size), tonumber(ft.stride)
-      return halfcomplex_set(idx, ft.data, size, stride, k, z)
-   end
-end
-
-local function hc_free(hc)
-   local b = hc.block
-   b.ref_count = b.ref_count - 1
-   if b.ref_count == 0 then
-      ffi.C.free(b.data)
-      ffi.C.free(b)
-   end
-end
-
-ffi.metatype(fft_hc, {
-                __gc       = hc_free,
-                __index    = hc_index,
-                __newindex = hc_newindex,
-                __len      = hc_length,
---                __tostring = hc_tostring,
-             }
-          )
-
-ffi.metatype(fft_radix2_hc, {
-                __gc       = hc_free,
-                __index    = hc_radix2_index,
-                __newindex = hc_radix2_newindex,
-                __len      = hc_length,
---                __tostring = hc_tostring,
-             }
-          )
-
-local register_ffi_type = debug.getregistry().__gsl_reg_ffi_type
-
-register_ffi_type(fft_radix2_hc, "radix2 half-complex vector")
-register_ffi_type(fft_hc, "half-complex vector")
diff --git a/fft.lua b/fft.lua
new file mode 100644
index 000000000..0d9e3fb3a
--- /dev/null
+++ b/fft.lua
@@ -0,0 +1,293 @@
+local ffi = require'ffi'
+local fftw = require'fftw3.init'
+local gsl_matrix = ffi.typeof('gsl_matrix')
+local gsl_matrix_complex = ffi.typeof('gsl_matrix_complex')
+
+local FORWARD = 1
+local BACKWARD = 2
+
+
+local function compare_dimensions(dim1, dim2)
+	for i = 1,#dim1 do
+		if dim1[i] ~= dim2[i] then
+			return false
+		end
+	end
+	return true
+end
+
+local dftplans = {}
+local function dft(invec, dimlist, outvec, direction)
+
+	if ffi.istype(gsl_matrix_complex, invec) then
+		local rank = #dimlist
+		local size = 1
+		for i=1,rank do size = size * dimlist[i] end
+		local direction = direction or FORWARD
+		local output_supplied = true
+
+		--First check if output vector is given by the user, otherwise allocate it
+		if not outvec then
+			output_supplied = false
+			outvec = matrix.calloc(size, 1)
+		end
+
+		local output = ffi.cast("fftw_complex*",outvec.data)
+		local input = ffi.cast("fftw_complex*",invec.data)
+
+		--Check if plan is existing, then use it
+		if dftplans[rank] ~= nil and dftplans[rank][direction] ~= nil and
+		   compare_dimensions(dimlist, dftplans[rank][direction].dimlist) and
+		   invec.tda == dftplans[rank][direction].itda and
+		   outvec.tda == dftplans[rank][direction].otda then
+
+			local plan = dftplans[rank][direction].plan
+			fftw.execute_dft(plan, input, output)
+			if not output_supplied then return outvec end
+		--Or create a new plan
+		else
+			--allcoate input for plan making but planning can invalidate input, therefore we use extra array
+			local inputtest = ffi.new("fftw_complex[?]", size * invec.tda)
+
+			local howmany = 1
+			local idist,odist = 0,0
+			local istride = invec.tda
+			local ostride = outvec.tda
+			local dim = ffi.new("int[?]", rank, dimlist)
+			local plan = ffi.gc(fftw.plan_many_dft(rank, dim ,howmany,
+											inputtest,nil, istride, idist,
+											output,nil,ostride,odist,
+											direction == FORWARD and fftw.FORWARD or fftw.BACKWARD, bit.bor(fftw.MEASURE, fftw.UNALIGNED)), fftw.destroy_plan)
+
+			--Save the plan for next time
+			if dftplans[rank] == nil then dftplans[rank] = {} end
+			dftplans[rank][direction] = {plan=plan, dimlist=dimlist, itda=invec.tda, otda=outvec.tda}
+
+			--Execute the plan with the supplied input
+			fftw.execute_dft(plan, input, output)
+			if not output_supplied then return outvec end
+		end
+	else
+		error("Input must be complex valued.")
+	end
+end
+
+local rdftplans = {}
+local function rdft(invec, dimlist, outvec)
+
+	if ffi.istype(gsl_matrix, invec) then
+		local rank = #dimlist
+		local size = 1
+		for i=1,rank do size = size * dimlist[i] end
+		local outputsize = size/dimlist[rank]*(math.floor(dimlist[rank]/2)+1)
+
+		local direction = FORWARD
+		local output_supplied = true
+
+		--First check if output vector is given by the user, otherwise allocate it
+		if not outvec then
+			output_supplied = false
+			outvec = matrix.calloc(outputsize, 1)
+		end
+
+		local output = ffi.cast("fftw_complex*",outvec.data)
+		local input = invec.data
+
+		--Check if plan is existing, then use it
+		if rdftplans[rank] ~= nil and rdftplans[rank][direction] ~= nil and
+		   compare_dimensions(dimlist, rdftplans[rank][direction].dimlist) and
+		   invec.tda == rdftplans[rank][direction].itda and
+		   outvec.tda == rdftplans[rank][direction].otda then
+
+			local plan = rdftplans[rank][direction].plan
+			fftw.execute_dft_r2c(plan, input, output)
+			if not output_supplied then return outvec end
+		--Or create a new plan
+		else
+			--allcoate input for plan making but planning can invalidate input, therefore we use extra array
+            local inputtest = ffi.new("double[?]", size * invec.tda)
+
+			local howmany = 1
+			local idist,odist = 0,0
+			local istride = invec.tda
+			local ostride = outvec.tda
+			local dim = ffi.new("int[?]", rank, dimlist)
+			local plan = ffi.gc(fftw.plan_many_dft_r2c(rank, dim, howmany,
+											 inputtest,nil,istride, idist,
+											 output,nil, ostride, odist,
+											 bit.bor(fftw.MEASURE, fftw.UNALIGNED)), fftw.destroy_plan)
+
+			--Save the plan for next time
+			if rdftplans[rank] == nil then rdftplans[rank] = {} end
+			rdftplans[rank][direction] = {plan=plan, dimlist=dimlist, itda=invec.tda, otda=outvec.tda}
+			--Execute the plan with the supplied input
+			fftw.execute_dft_r2c(plan, input, output)
+			if not output_supplied then return outvec end
+		end
+	else
+		error("Input must be real valued.")
+	end
+end
+
+local function rdftinv(invec, dimlist, outvec)
+
+	if ffi.istype(gsl_matrix_complex, invec) then
+		local rank = #dimlist
+		local size = 1
+		for i=1,rank do size = size * dimlist[i] end
+		local inputsize = size/dimlist[rank]*(math.floor(dimlist[rank]/2)+1)
+
+		local direction = BACKWARD
+		local output_supplied = true
+
+		--First check if output vector is given by the user, otherwise allocate it
+		if not outvec then
+			output_supplied = false
+			outvec = matrix.alloc(size, 1)
+		end
+
+		local output = outvec.data
+		
+		--[[Copying over the input because according to FFTW docs:
+		"Second, the inverse transform (complex to real) has the side-effect of overwriting its input array,
+		by default. Neither of these inconveniences should pose a serious problem for users,
+		but it is important to be aware of them."
+
+		and also
+
+		"As noted above, the c2r transform destroys its input array even for out-of-place transforms.
+		This can be prevented, if necessary, by including FFTW_PRESERVE_INPUT in the flags,
+		with unfortunately some sacrifice in performance.
+		This flag is also not currently supported for multi-dimensional real DFTs (next section)."
+
+		For reason of generality here we always copy over the input for the c2r transformation.
+		]]--
+		local input = ffi.new("fftw_complex[?]",size)
+		ffi.copy(input, invec.data, size*ffi.sizeof("fftw_complex"))
+
+		--Check if plan is existing, then use it
+		if rdftplans[rank] ~= nil and rdftplans[rank][direction] ~= nil and
+		   compare_dimensions(dimlist, rdftplans[rank][direction].dimlist) and
+		   invec.tda == rdftplans[rank][direction].itda and
+		   outvec.tda == rdftplans[rank][direction].otda then
+
+			local plan = rdftplans[rank][direction].plan
+			fftw.execute_dft_c2r(plan, input, output)
+			if not output_supplied then return outvec end
+		--Or create a new plan
+		else
+			--allcoate input for plan making but planning can invalidate input, therefore we use extra array
+			local inputtest = ffi.new("fftw_complex[?]", inputsize * invec.tda)
+
+			local howmany = 1
+			local idist,odist = 0,0
+			local istride = invec.tda
+			local ostride = outvec.tda
+			local dim = ffi.new("int[?]", rank, dimlist)
+			local plan = ffi.gc(fftw.plan_many_dft_c2r(rank, dim, howmany,
+											inputtest, nil, istride, idist,
+											output, nil, ostride, odist,
+											bit.bor(fftw.MEASURE, fftw.UNALIGNED)), fftw.destroy_plan)
+
+			--Save the plan for next time
+			if rdftplans[rank] == nil then rdftplans[rank] = {} end
+			rdftplans[rank][direction] = {plan=plan, dimlist=dimlist, itda=invec.tda, otda=outvec.tda}
+			--Execute the plan with the supplied input
+			fftw.execute_dft_c2r(plan, input, output)
+			if not output_supplied then return outvec end
+		end
+	else
+		error("Input must be complex valued.")
+	end
+end
+
+------------------------------------------------
+
+function num.fft(invec, outvec)
+	return dft(invec, {#invec}, outvec, FORWARD)
+end
+
+function num.fftinv(invec, outvec)
+	return dft(invec, {#invec}, outvec, BACKWARD)
+end
+
+function num.rfft(invec, outvec)
+	return rdft(invec, {#invec}, outvec)
+end
+
+function num.rfftinv(invec, outvec)
+	return rdftinv(invec, {(#invec-1)*2}, outvec)
+end
+
+function num.fft2(inmat, outmat)
+	local n1 = tonumber(inmat.size1)
+	local n2 = tonumber(inmat.size2)
+
+	if outmat == nil then
+		local retvec = dft(inmat, {n1, n2}, outmat, FORWARD)
+		local b = ffi.cast('gsl_block_complex*', ffi.C.malloc(ffi.sizeof('gsl_block_complex')))
+	   	b.size, b.data, b.ref_count = n1*n2, retvec.data, 2
+	   	return gsl_matrix_complex(n1, n2, n2, b.data, b, 1)
+	else
+		dft(inmat, {n1, n2}, outmat, FORWARD)
+	end
+end
+
+function num.fft2inv(inmat, outmat)
+	local n1 = tonumber(inmat.size1)
+	local n2 = tonumber(inmat.size2)
+
+	if outmat == nil then
+		local retvec = dft(inmat, {n1, n2}, outmat, BACKWARD)
+		local b = ffi.cast('gsl_block_complex*', ffi.C.malloc(ffi.sizeof('gsl_block_complex')))
+	   	b.size, b.data, b.ref_count = n1*n2, retvec.data, 2
+	   	return gsl_matrix_complex(n1, n2, n2, b.data, b, 1)
+	else
+		dft(inmat, {n1, n2}, outmat, BACKWARD)
+	end
+end
+
+function num.rfft2(inmat, outmat)
+	local n1 = tonumber(inmat.size1)
+	local n2 = tonumber(inmat.size2)
+
+	if outmat == nil then
+		local retvec = rdft(inmat, {n1, n2}, outmat)
+		local b = ffi.cast('gsl_block_complex*', ffi.C.malloc(ffi.sizeof('gsl_block_complex')))
+		local n2new = math.floor(n2/2)+1
+	   	b.size, b.data, b.ref_count = n1*n2new, retvec.data, 2
+	   	return gsl_matrix_complex(n1, n2new, n2new, b.data, b, 1)
+	else
+		rdft(inmat, {n1, n2}, outmat)
+	end
+end
+
+function num.rfft2inv(inmat, outmat)
+	local n1 = tonumber(inmat.size1)
+	local n2 = (tonumber(inmat.size2)-1)*2
+
+	if outmat == nil then
+		local retvec = rdftinv(inmat, {n1, n2}, outmat)
+		local b = ffi.cast('gsl_block*', ffi.C.malloc(ffi.sizeof('gsl_block')))
+	   	b.size, b.data, b.ref_count = n1*n2, retvec.data, 2
+	   	return gsl_matrix(n1, n2, n2, b.data, b, 1)
+	else
+		rdft(inmat, {n1, n2}, outmat)
+	end
+end
+
+function num.fftn(invec, dimlist, outvec)
+	return dft(invec, dimlist, outvec, FORWARD)
+end
+
+function num.fftninv(invec, dimlist, outvec)
+	return dft(invec, dimlist, outvec, BACKWARD)
+end
+
+function num.rfftn(invec, dimlist, outvec)
+	return rdft(invec, dimlist, outvec)
+end
+
+function num.rfftninv(invec, dimlist, outvec)
+	return rdftinv(invec, dimlist, outvec)
+end
diff --git a/fftw3/cdefs.lua b/fftw3/cdefs.lua
new file mode 100644
index 000000000..e1512f151
--- /dev/null
+++ b/fftw3/cdefs.lua
@@ -0,0 +1,346 @@
+-- Cut and paste from the C preprocessor output
+-- Removed inline/defined functions which are not supported by luajit
+-- Instead, those are defined into defines.lua
+-- Note there are some tests here and there to stay cross-platform
+
+local ffi = require 'ffi'
+
+ffi.cdef[[
+typedef struct _FILE FILE;
+typedef long double __float128;
+]]
+
+
+ffi.cdef[[
+
+enum fftw_r2r_kind_do_not_use_me {
+     FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2,
+     FFTW_REDFT00=3, FFTW_REDFT01=4, FFTW_REDFT10=5, FFTW_REDFT11=6,
+     FFTW_RODFT00=7, FFTW_RODFT01=8, FFTW_RODFT10=9, FFTW_RODFT11=10
+};
+
+struct fftw_iodim_do_not_use_me {
+     int n;
+     int is;
+     int os;
+};
+
+struct fftw_iodim64_do_not_use_me {
+     ptrdiff_t n;
+     ptrdiff_t is;
+     ptrdiff_t os;
+};
+
+typedef void (*fftw_write_char_func_do_not_use_me)(char c, void *);
+typedef int (*fftw_read_char_func_do_not_use_me)(void *);
+
+typedef double fftw_complex[2]; 
+typedef struct fftw_plan_s *fftw_plan; 
+typedef struct fftw_iodim_do_not_use_me fftw_iodim; 
+typedef struct fftw_iodim64_do_not_use_me fftw_iodim64; 
+typedef enum fftw_r2r_kind_do_not_use_me fftw_r2r_kind; 
+typedef fftw_write_char_func_do_not_use_me fftw_write_char_func; 
+typedef fftw_read_char_func_do_not_use_me fftw_read_char_func; 
+extern void fftw_execute(const fftw_plan p); 
+extern fftw_plan fftw_plan_dft(int rank, const int *n, fftw_complex *in, fftw_complex *out, int sign, unsigned flags); 
+extern fftw_plan fftw_plan_dft_1d(int n, fftw_complex *in, fftw_complex *out, int sign, unsigned flags); 
+extern fftw_plan fftw_plan_dft_2d(int n0, int n1, fftw_complex *in, fftw_complex *out, int sign, unsigned flags); 
+extern fftw_plan fftw_plan_dft_3d(int n0, int n1, int n2, fftw_complex *in, fftw_complex *out, int sign, unsigned flags); 
+extern fftw_plan fftw_plan_many_dft(int rank, const int *n, int howmany, fftw_complex *in, const int *inembed, int istride, int idist, fftw_complex *out, const int *onembed, int ostride, int odist, int sign, unsigned flags); 
+extern fftw_plan fftw_plan_guru_dft(int rank, const fftw_iodim *dims, int howmany_rank, const fftw_iodim *howmany_dims, fftw_complex *in, fftw_complex *out, int sign, unsigned flags); 
+extern fftw_plan fftw_plan_guru_split_dft(int rank, const fftw_iodim *dims, int howmany_rank, const fftw_iodim *howmany_dims, double *ri, double *ii, double *ro, double *io, unsigned flags); 
+extern fftw_plan fftw_plan_guru64_dft(int rank, const fftw_iodim64 *dims, int howmany_rank, const fftw_iodim64 *howmany_dims, fftw_complex *in, fftw_complex *out, int sign, unsigned flags); 
+extern fftw_plan fftw_plan_guru64_split_dft(int rank, const fftw_iodim64 *dims, int howmany_rank, const fftw_iodim64 *howmany_dims, double *ri, double *ii, double *ro, double *io, unsigned flags); 
+extern void fftw_execute_dft(const fftw_plan p, fftw_complex *in, fftw_complex *out); 
+extern void fftw_execute_split_dft(const fftw_plan p, double *ri, double *ii, double *ro, double *io); 
+extern fftw_plan fftw_plan_many_dft_r2c(int rank, const int *n, int howmany, double *in, const int *inembed, int istride, int idist, fftw_complex *out, const int *onembed, int ostride, int odist, unsigned flags); 
+extern fftw_plan fftw_plan_dft_r2c(int rank, const int *n, double *in, fftw_complex *out, unsigned flags); 
+extern fftw_plan fftw_plan_dft_r2c_1d(int n,double *in,fftw_complex *out,unsigned flags); 
+extern fftw_plan fftw_plan_dft_r2c_2d(int n0, int n1, double *in, fftw_complex *out, unsigned flags); 
+extern fftw_plan fftw_plan_dft_r2c_3d(int n0, int n1, int n2, double *in, fftw_complex *out, unsigned flags); 
+extern fftw_plan fftw_plan_many_dft_c2r(int rank, const int *n, int howmany, fftw_complex *in, const int *inembed, int istride, int idist, double *out, const int *onembed, int ostride, int odist, unsigned flags); 
+extern fftw_plan fftw_plan_dft_c2r(int rank, const int *n, fftw_complex *in, double *out, unsigned flags); 
+extern fftw_plan fftw_plan_dft_c2r_1d(int n,fftw_complex *in,double *out,unsigned flags); 
+extern fftw_plan fftw_plan_dft_c2r_2d(int n0, int n1, fftw_complex *in, double *out, unsigned flags); 
+extern fftw_plan fftw_plan_dft_c2r_3d(int n0, int n1, int n2, fftw_complex *in, double *out, unsigned flags); 
+extern fftw_plan fftw_plan_guru_dft_r2c(int rank, const fftw_iodim *dims, int howmany_rank, const fftw_iodim *howmany_dims, double *in, fftw_complex *out, unsigned flags); 
+extern fftw_plan fftw_plan_guru_dft_c2r(int rank, const fftw_iodim *dims, int howmany_rank, const fftw_iodim *howmany_dims, fftw_complex *in, double *out, unsigned flags); 
+extern fftw_plan fftw_plan_guru_split_dft_r2c( int rank, const fftw_iodim *dims, int howmany_rank, const fftw_iodim *howmany_dims, double *in, double *ro, double *io, unsigned flags); 
+extern fftw_plan fftw_plan_guru_split_dft_c2r( int rank, const fftw_iodim *dims, int howmany_rank, const fftw_iodim *howmany_dims, double *ri, double *ii, double *out, unsigned flags); 
+extern fftw_plan fftw_plan_guru64_dft_r2c(int rank, const fftw_iodim64 *dims, int howmany_rank, const fftw_iodim64 *howmany_dims, double *in, fftw_complex *out, unsigned flags); 
+extern fftw_plan fftw_plan_guru64_dft_c2r(int rank, const fftw_iodim64 *dims, int howmany_rank, const fftw_iodim64 *howmany_dims, fftw_complex *in, double *out, unsigned flags); 
+extern fftw_plan fftw_plan_guru64_split_dft_r2c( int rank, const fftw_iodim64 *dims, int howmany_rank, const fftw_iodim64 *howmany_dims, double *in, double *ro, double *io, unsigned flags); 
+extern fftw_plan fftw_plan_guru64_split_dft_c2r( int rank, const fftw_iodim64 *dims, int howmany_rank, const fftw_iodim64 *howmany_dims, double *ri, double *ii, double *out, unsigned flags); 
+extern void fftw_execute_dft_r2c(const fftw_plan p, double *in, fftw_complex *out); 
+extern void fftw_execute_dft_c2r(const fftw_plan p, fftw_complex *in, double *out); 
+extern void fftw_execute_split_dft_r2c(const fftw_plan p, double *in, double *ro, double *io); 
+extern void fftw_execute_split_dft_c2r(const fftw_plan p, double *ri, double *ii, double *out); 
+extern fftw_plan fftw_plan_many_r2r(int rank, const int *n, int howmany, double *in, const int *inembed, int istride, int idist, double *out, const int *onembed, int ostride, int odist, const fftw_r2r_kind *kind, unsigned flags); 
+extern fftw_plan fftw_plan_r2r(int rank, const int *n, double *in, double *out, const fftw_r2r_kind *kind, unsigned flags); 
+extern fftw_plan fftw_plan_r2r_1d(int n, double *in, double *out, fftw_r2r_kind kind, unsigned flags); 
+extern fftw_plan fftw_plan_r2r_2d(int n0, int n1, double *in, double *out, fftw_r2r_kind kind0, fftw_r2r_kind kind1, unsigned flags); 
+extern fftw_plan fftw_plan_r2r_3d(int n0, int n1, int n2, double *in, double *out, fftw_r2r_kind kind0, fftw_r2r_kind kind1, fftw_r2r_kind kind2, unsigned flags); 
+extern fftw_plan fftw_plan_guru_r2r(int rank, const fftw_iodim *dims, int howmany_rank, const fftw_iodim *howmany_dims, double *in, double *out, const fftw_r2r_kind *kind, unsigned flags); 
+extern fftw_plan fftw_plan_guru64_r2r(int rank, const fftw_iodim64 *dims, int howmany_rank, const fftw_iodim64 *howmany_dims, double *in, double *out, const fftw_r2r_kind *kind, unsigned flags); 
+extern void fftw_execute_r2r(const fftw_plan p, double *in, double *out); 
+extern void fftw_destroy_plan(fftw_plan p); 
+extern void fftw_forget_wisdom(void); 
+extern void fftw_cleanup(void); 
+extern void fftw_set_timelimit(double t); 
+extern void fftw_plan_with_nthreads(int nthreads); 
+extern int fftw_init_threads(void); 
+extern void fftw_cleanup_threads(void); 
+extern int fftw_export_wisdom_to_filename(const char *filename); 
+extern void fftw_export_wisdom_to_file(FILE *output_file); 
+extern char *fftw_export_wisdom_to_string(void); 
+extern void fftw_export_wisdom(fftw_write_char_func write_char, void *data); 
+extern int fftw_import_system_wisdom(void); 
+extern int fftw_import_wisdom_from_filename(const char *filename); 
+extern int fftw_import_wisdom_from_file(FILE *input_file); 
+extern int fftw_import_wisdom_from_string(const char *input_string); 
+extern int fftw_import_wisdom(fftw_read_char_func read_char, void *data); 
+extern void fftw_fprint_plan(const fftw_plan p, FILE *output_file); 
+extern void fftw_print_plan(const fftw_plan p); 
+extern void *fftw_malloc(size_t n); 
+extern double *fftw_alloc_real(size_t n); 
+extern fftw_complex *fftw_alloc_complex(size_t n); 
+extern void fftw_free(void *p); 
+extern void fftw_flops(const fftw_plan p, double *add, double *mul, double *fmas); 
+extern double fftw_estimate_cost(const fftw_plan p); 
+extern double fftw_cost(const fftw_plan p); 
+extern const char fftw_version[]; 
+extern const char fftw_cc[]; 
+extern const char fftw_codelet_optim[];
+typedef float fftwf_complex[2]; 
+typedef struct fftwf_plan_s *fftwf_plan; 
+typedef struct fftw_iodim_do_not_use_me fftwf_iodim; 
+typedef struct fftw_iodim64_do_not_use_me fftwf_iodim64; 
+typedef enum fftw_r2r_kind_do_not_use_me fftwf_r2r_kind; 
+typedef fftw_write_char_func_do_not_use_me fftwf_write_char_func; 
+typedef fftw_read_char_func_do_not_use_me fftwf_read_char_func; 
+extern void fftwf_execute(const fftwf_plan p); 
+extern fftwf_plan fftwf_plan_dft(int rank, const int *n, fftwf_complex *in, fftwf_complex *out, int sign, unsigned flags); 
+extern fftwf_plan fftwf_plan_dft_1d(int n, fftwf_complex *in, fftwf_complex *out, int sign, unsigned flags); 
+extern fftwf_plan fftwf_plan_dft_2d(int n0, int n1, fftwf_complex *in, fftwf_complex *out, int sign, unsigned flags); 
+extern fftwf_plan fftwf_plan_dft_3d(int n0, int n1, int n2, fftwf_complex *in, fftwf_complex *out, int sign, unsigned flags); 
+extern fftwf_plan fftwf_plan_many_dft(int rank, const int *n, int howmany, fftwf_complex *in, const int *inembed, int istride, int idist, fftwf_complex *out, const int *onembed, int ostride, int odist, int sign, unsigned flags); 
+extern fftwf_plan fftwf_plan_guru_dft(int rank, const fftwf_iodim *dims, int howmany_rank, const fftwf_iodim *howmany_dims, fftwf_complex *in, fftwf_complex *out, int sign, unsigned flags); 
+extern fftwf_plan fftwf_plan_guru_split_dft(int rank, const fftwf_iodim *dims, int howmany_rank, const fftwf_iodim *howmany_dims, float *ri, float *ii, float *ro, float *io, unsigned flags); 
+extern fftwf_plan fftwf_plan_guru64_dft(int rank, const fftwf_iodim64 *dims, int howmany_rank, const fftwf_iodim64 *howmany_dims, fftwf_complex *in, fftwf_complex *out, int sign, unsigned flags); 
+extern fftwf_plan fftwf_plan_guru64_split_dft(int rank, const fftwf_iodim64 *dims, int howmany_rank, const fftwf_iodim64 *howmany_dims, float *ri, float *ii, float *ro, float *io, unsigned flags); 
+extern void fftwf_execute_dft(const fftwf_plan p, fftwf_complex *in, fftwf_complex *out); 
+extern void fftwf_execute_split_dft(const fftwf_plan p, float *ri, float *ii, float *ro, float *io); 
+extern fftwf_plan fftwf_plan_many_dft_r2c(int rank, const int *n, int howmany, float *in, const int *inembed, int istride, int idist, fftwf_complex *out, const int *onembed, int ostride, int odist, unsigned flags); 
+extern fftwf_plan fftwf_plan_dft_r2c(int rank, const int *n, float *in, fftwf_complex *out, unsigned flags); 
+extern fftwf_plan fftwf_plan_dft_r2c_1d(int n,float *in,fftwf_complex *out,unsigned flags); 
+extern fftwf_plan fftwf_plan_dft_r2c_2d(int n0, int n1, float *in, fftwf_complex *out, unsigned flags); 
+extern fftwf_plan fftwf_plan_dft_r2c_3d(int n0, int n1, int n2, float *in, fftwf_complex *out, unsigned flags); 
+extern fftwf_plan fftwf_plan_many_dft_c2r(int rank, const int *n, int howmany, fftwf_complex *in, const int *inembed, int istride, int idist, float *out, const int *onembed, int ostride, int odist, unsigned flags); 
+extern fftwf_plan fftwf_plan_dft_c2r(int rank, const int *n, fftwf_complex *in, float *out, unsigned flags); 
+extern fftwf_plan fftwf_plan_dft_c2r_1d(int n,fftwf_complex *in,float *out,unsigned flags); 
+extern fftwf_plan fftwf_plan_dft_c2r_2d(int n0, int n1, fftwf_complex *in, float *out, unsigned flags); 
+extern fftwf_plan fftwf_plan_dft_c2r_3d(int n0, int n1, int n2, fftwf_complex *in, float *out, unsigned flags); 
+extern fftwf_plan fftwf_plan_guru_dft_r2c(int rank, const fftwf_iodim *dims, int howmany_rank, const fftwf_iodim *howmany_dims, float *in, fftwf_complex *out, unsigned flags); 
+extern fftwf_plan fftwf_plan_guru_dft_c2r(int rank, const fftwf_iodim *dims, int howmany_rank, const fftwf_iodim *howmany_dims, fftwf_complex *in, float *out, unsigned flags); 
+extern fftwf_plan fftwf_plan_guru_split_dft_r2c( int rank, const fftwf_iodim *dims, int howmany_rank, const fftwf_iodim *howmany_dims, float *in, float *ro, float *io, unsigned flags); 
+extern fftwf_plan fftwf_plan_guru_split_dft_c2r( int rank, const fftwf_iodim *dims, int howmany_rank, const fftwf_iodim *howmany_dims, float *ri, float *ii, float *out, unsigned flags); 
+extern fftwf_plan fftwf_plan_guru64_dft_r2c(int rank, const fftwf_iodim64 *dims, int howmany_rank, const fftwf_iodim64 *howmany_dims, float *in, fftwf_complex *out, unsigned flags); 
+extern fftwf_plan fftwf_plan_guru64_dft_c2r(int rank, const fftwf_iodim64 *dims, int howmany_rank, const fftwf_iodim64 *howmany_dims, fftwf_complex *in, float *out, unsigned flags); 
+extern fftwf_plan fftwf_plan_guru64_split_dft_r2c( int rank, const fftwf_iodim64 *dims, int howmany_rank, const fftwf_iodim64 *howmany_dims, float *in, float *ro, float *io, unsigned flags); 
+extern fftwf_plan fftwf_plan_guru64_split_dft_c2r( int rank, const fftwf_iodim64 *dims, int howmany_rank, const fftwf_iodim64 *howmany_dims, float *ri, float *ii, float *out, unsigned flags); 
+extern void fftwf_execute_dft_r2c(const fftwf_plan p, float *in, fftwf_complex *out); 
+extern void fftwf_execute_dft_c2r(const fftwf_plan p, fftwf_complex *in, float *out); 
+extern void fftwf_execute_split_dft_r2c(const fftwf_plan p, float *in, float *ro, float *io); 
+extern void fftwf_execute_split_dft_c2r(const fftwf_plan p, float *ri, float *ii, float *out); 
+extern fftwf_plan fftwf_plan_many_r2r(int rank, const int *n, int howmany, float *in, const int *inembed, int istride, int idist, float *out, const int *onembed, int ostride, int odist, const fftwf_r2r_kind *kind, unsigned flags); 
+extern fftwf_plan fftwf_plan_r2r(int rank, const int *n, float *in, float *out, const fftwf_r2r_kind *kind, unsigned flags); 
+extern fftwf_plan fftwf_plan_r2r_1d(int n, float *in, float *out, fftwf_r2r_kind kind, unsigned flags); 
+extern fftwf_plan fftwf_plan_r2r_2d(int n0, int n1, float *in, float *out, fftwf_r2r_kind kind0, fftwf_r2r_kind kind1, unsigned flags); 
+extern fftwf_plan fftwf_plan_r2r_3d(int n0, int n1, int n2, float *in, float *out, fftwf_r2r_kind kind0, fftwf_r2r_kind kind1, fftwf_r2r_kind kind2, unsigned flags); 
+extern fftwf_plan fftwf_plan_guru_r2r(int rank, const fftwf_iodim *dims, int howmany_rank, const fftwf_iodim *howmany_dims, float *in, float *out, const fftwf_r2r_kind *kind, unsigned flags); 
+extern fftwf_plan fftwf_plan_guru64_r2r(int rank, const fftwf_iodim64 *dims, int howmany_rank, const fftwf_iodim64 *howmany_dims, float *in, float *out, const fftwf_r2r_kind *kind, unsigned flags); 
+extern void fftwf_execute_r2r(const fftwf_plan p, float *in, float *out); 
+extern void fftwf_destroy_plan(fftwf_plan p); 
+extern void fftwf_forget_wisdom(void); 
+extern void fftwf_cleanup(void); 
+extern void fftwf_set_timelimit(double t); 
+extern void fftwf_plan_with_nthreads(int nthreads); 
+extern int fftwf_init_threads(void); 
+extern void fftwf_cleanup_threads(void); 
+extern int fftwf_export_wisdom_to_filename(const char *filename); 
+extern void fftwf_export_wisdom_to_file(FILE *output_file); 
+extern char *fftwf_export_wisdom_to_string(void); 
+extern void fftwf_export_wisdom(fftwf_write_char_func write_char, void *data); 
+extern int fftwf_import_system_wisdom(void); 
+extern int fftwf_import_wisdom_from_filename(const char *filename); 
+extern int fftwf_import_wisdom_from_file(FILE *input_file); 
+extern int fftwf_import_wisdom_from_string(const char *input_string); 
+extern int fftwf_import_wisdom(fftwf_read_char_func read_char, void *data); 
+extern void fftwf_fprint_plan(const fftwf_plan p, FILE *output_file); 
+extern void fftwf_print_plan(const fftwf_plan p); 
+extern void *fftwf_malloc(size_t n); 
+extern float *fftwf_alloc_real(size_t n); 
+extern fftwf_complex *fftwf_alloc_complex(size_t n); 
+extern void fftwf_free(void *p); 
+extern void fftwf_flops(const fftwf_plan p, double *add, double *mul, double *fmas); 
+extern double fftwf_estimate_cost(const fftwf_plan p); 
+extern double fftwf_cost(const fftwf_plan p); 
+extern const char fftwf_version[]; 
+extern const char fftwf_cc[]; 
+extern const char fftwf_codelet_optim[];
+typedef long double fftwl_complex[2]; 
+typedef struct fftwl_plan_s *fftwl_plan; 
+typedef struct fftw_iodim_do_not_use_me fftwl_iodim; 
+typedef struct fftw_iodim64_do_not_use_me fftwl_iodim64; 
+typedef enum fftw_r2r_kind_do_not_use_me fftwl_r2r_kind; 
+typedef fftw_write_char_func_do_not_use_me fftwl_write_char_func; 
+typedef fftw_read_char_func_do_not_use_me fftwl_read_char_func; 
+extern void fftwl_execute(const fftwl_plan p); 
+extern fftwl_plan fftwl_plan_dft(int rank, const int *n, fftwl_complex *in, fftwl_complex *out, int sign, unsigned flags); 
+extern fftwl_plan fftwl_plan_dft_1d(int n, fftwl_complex *in, fftwl_complex *out, int sign, unsigned flags); 
+extern fftwl_plan fftwl_plan_dft_2d(int n0, int n1, fftwl_complex *in, fftwl_complex *out, int sign, unsigned flags); 
+extern fftwl_plan fftwl_plan_dft_3d(int n0, int n1, int n2, fftwl_complex *in, fftwl_complex *out, int sign, unsigned flags); 
+extern fftwl_plan fftwl_plan_many_dft(int rank, const int *n, int howmany, fftwl_complex *in, const int *inembed, int istride, int idist, fftwl_complex *out, const int *onembed, int ostride, int odist, int sign, unsigned flags); 
+extern fftwl_plan fftwl_plan_guru_dft(int rank, const fftwl_iodim *dims, int howmany_rank, const fftwl_iodim *howmany_dims, fftwl_complex *in, fftwl_complex *out, int sign, unsigned flags); 
+extern fftwl_plan fftwl_plan_guru_split_dft(int rank, const fftwl_iodim *dims, int howmany_rank, const fftwl_iodim *howmany_dims, long double *ri, long double *ii, long double *ro, long double *io, unsigned flags); 
+extern fftwl_plan fftwl_plan_guru64_dft(int rank, const fftwl_iodim64 *dims, int howmany_rank, const fftwl_iodim64 *howmany_dims, fftwl_complex *in, fftwl_complex *out, int sign, unsigned flags); 
+extern fftwl_plan fftwl_plan_guru64_split_dft(int rank, const fftwl_iodim64 *dims, int howmany_rank, const fftwl_iodim64 *howmany_dims, long double *ri, long double *ii, long double *ro, long double *io, unsigned flags); 
+extern void fftwl_execute_dft(const fftwl_plan p, fftwl_complex *in, fftwl_complex *out); 
+extern void fftwl_execute_split_dft(const fftwl_plan p, long double *ri, long double *ii, long double *ro, long double *io); 
+extern fftwl_plan fftwl_plan_many_dft_r2c(int rank, const int *n, int howmany, long double *in, const int *inembed, int istride, int idist, fftwl_complex *out, const int *onembed, int ostride, int odist, unsigned flags); 
+extern fftwl_plan fftwl_plan_dft_r2c(int rank, const int *n, long double *in, fftwl_complex *out, unsigned flags); 
+extern fftwl_plan fftwl_plan_dft_r2c_1d(int n,long double *in,fftwl_complex *out,unsigned flags); 
+extern fftwl_plan fftwl_plan_dft_r2c_2d(int n0, int n1, long double *in, fftwl_complex *out, unsigned flags); 
+extern fftwl_plan fftwl_plan_dft_r2c_3d(int n0, int n1, int n2, long double *in, fftwl_complex *out, unsigned flags); 
+extern fftwl_plan fftwl_plan_many_dft_c2r(int rank, const int *n, int howmany, fftwl_complex *in, const int *inembed, int istride, int idist, long double *out, const int *onembed, int ostride, int odist, unsigned flags); 
+extern fftwl_plan fftwl_plan_dft_c2r(int rank, const int *n, fftwl_complex *in, long double *out, unsigned flags); 
+extern fftwl_plan fftwl_plan_dft_c2r_1d(int n,fftwl_complex *in,long double *out,unsigned flags); 
+extern fftwl_plan fftwl_plan_dft_c2r_2d(int n0, int n1, fftwl_complex *in, long double *out, unsigned flags); 
+extern fftwl_plan fftwl_plan_dft_c2r_3d(int n0, int n1, int n2, fftwl_complex *in, long double *out, unsigned flags); 
+extern fftwl_plan fftwl_plan_guru_dft_r2c(int rank, const fftwl_iodim *dims, int howmany_rank, const fftwl_iodim *howmany_dims, long double *in, fftwl_complex *out, unsigned flags); 
+extern fftwl_plan fftwl_plan_guru_dft_c2r(int rank, const fftwl_iodim *dims, int howmany_rank, const fftwl_iodim *howmany_dims, fftwl_complex *in, long double *out, unsigned flags); 
+extern fftwl_plan fftwl_plan_guru_split_dft_r2c( int rank, const fftwl_iodim *dims, int howmany_rank, const fftwl_iodim *howmany_dims, long double *in, long double *ro, long double *io, unsigned flags); 
+extern fftwl_plan fftwl_plan_guru_split_dft_c2r( int rank, const fftwl_iodim *dims, int howmany_rank, const fftwl_iodim *howmany_dims, long double *ri, long double *ii, long double *out, unsigned flags); 
+extern fftwl_plan fftwl_plan_guru64_dft_r2c(int rank, const fftwl_iodim64 *dims, int howmany_rank, const fftwl_iodim64 *howmany_dims, long double *in, fftwl_complex *out, unsigned flags); 
+extern fftwl_plan fftwl_plan_guru64_dft_c2r(int rank, const fftwl_iodim64 *dims, int howmany_rank, const fftwl_iodim64 *howmany_dims, fftwl_complex *in, long double *out, unsigned flags); 
+extern fftwl_plan fftwl_plan_guru64_split_dft_r2c( int rank, const fftwl_iodim64 *dims, int howmany_rank, const fftwl_iodim64 *howmany_dims, long double *in, long double *ro, long double *io, unsigned flags); 
+extern fftwl_plan fftwl_plan_guru64_split_dft_c2r( int rank, const fftwl_iodim64 *dims, int howmany_rank, const fftwl_iodim64 *howmany_dims, long double *ri, long double *ii, long double *out, unsigned flags); 
+extern void fftwl_execute_dft_r2c(const fftwl_plan p, long double *in, fftwl_complex *out); 
+extern void fftwl_execute_dft_c2r(const fftwl_plan p, fftwl_complex *in, long double *out); 
+extern void fftwl_execute_split_dft_r2c(const fftwl_plan p, long double *in, long double *ro, long double *io); 
+extern void fftwl_execute_split_dft_c2r(const fftwl_plan p, long double *ri, long double *ii, long double *out); 
+extern fftwl_plan fftwl_plan_many_r2r(int rank, const int *n, int howmany, long double *in, const int *inembed, int istride, int idist, long double *out, const int *onembed, int ostride, int odist, const fftwl_r2r_kind *kind, unsigned flags); 
+extern fftwl_plan fftwl_plan_r2r(int rank, const int *n, long double *in, long double *out, const fftwl_r2r_kind *kind, unsigned flags); 
+extern fftwl_plan fftwl_plan_r2r_1d(int n, long double *in, long double *out, fftwl_r2r_kind kind, unsigned flags); 
+extern fftwl_plan fftwl_plan_r2r_2d(int n0, int n1, long double *in, long double *out, fftwl_r2r_kind kind0, fftwl_r2r_kind kind1, unsigned flags); 
+extern fftwl_plan fftwl_plan_r2r_3d(int n0, int n1, int n2, long double *in, long double *out, fftwl_r2r_kind kind0, fftwl_r2r_kind kind1, fftwl_r2r_kind kind2, unsigned flags); 
+extern fftwl_plan fftwl_plan_guru_r2r(int rank, const fftwl_iodim *dims, int howmany_rank, const fftwl_iodim *howmany_dims, long double *in, long double *out, const fftwl_r2r_kind *kind, unsigned flags); 
+extern fftwl_plan fftwl_plan_guru64_r2r(int rank, const fftwl_iodim64 *dims, int howmany_rank, const fftwl_iodim64 *howmany_dims, long double *in, long double *out, const fftwl_r2r_kind *kind, unsigned flags); 
+extern void fftwl_execute_r2r(const fftwl_plan p, long double *in, long double *out); 
+extern void fftwl_destroy_plan(fftwl_plan p); 
+extern void fftwl_forget_wisdom(void); 
+extern void fftwl_cleanup(void); 
+extern void fftwl_set_timelimit(double t); 
+extern void fftwl_plan_with_nthreads(int nthreads); 
+extern int fftwl_init_threads(void); 
+extern void fftwl_cleanup_threads(void); 
+extern int fftwl_export_wisdom_to_filename(const char *filename); 
+extern void fftwl_export_wisdom_to_file(FILE *output_file); 
+extern char *fftwl_export_wisdom_to_string(void); 
+extern void fftwl_export_wisdom(fftwl_write_char_func write_char, void *data); 
+extern int fftwl_import_system_wisdom(void); 
+extern int fftwl_import_wisdom_from_filename(const char *filename); 
+extern int fftwl_import_wisdom_from_file(FILE *input_file); 
+extern int fftwl_import_wisdom_from_string(const char *input_string); 
+extern int fftwl_import_wisdom(fftwl_read_char_func read_char, void *data); 
+extern void fftwl_fprint_plan(const fftwl_plan p, FILE *output_file); 
+extern void fftwl_print_plan(const fftwl_plan p); 
+extern void *fftwl_malloc(size_t n); 
+extern long double *fftwl_alloc_real(size_t n); 
+extern fftwl_complex *fftwl_alloc_complex(size_t n); 
+extern void fftwl_free(void *p); 
+extern void fftwl_flops(const fftwl_plan p, double *add, double *mul, double *fmas); 
+extern double fftwl_estimate_cost(const fftwl_plan p); 
+extern double fftwl_cost(const fftwl_plan p); 
+extern const char fftwl_version[]; 
+extern const char fftwl_cc[]; 
+extern const char fftwl_codelet_optim[];
+typedef __float128 fftwq_complex[2]; 
+typedef struct fftwq_plan_s *fftwq_plan; 
+typedef struct fftw_iodim_do_not_use_me fftwq_iodim; 
+typedef struct fftw_iodim64_do_not_use_me fftwq_iodim64; 
+typedef enum fftw_r2r_kind_do_not_use_me fftwq_r2r_kind; 
+typedef fftw_write_char_func_do_not_use_me fftwq_write_char_func; 
+typedef fftw_read_char_func_do_not_use_me fftwq_read_char_func; 
+extern void fftwq_execute(const fftwq_plan p); 
+extern fftwq_plan fftwq_plan_dft(int rank, const int *n, fftwq_complex *in, fftwq_complex *out, int sign, unsigned flags); 
+extern fftwq_plan fftwq_plan_dft_1d(int n, fftwq_complex *in, fftwq_complex *out, int sign, unsigned flags); 
+extern fftwq_plan fftwq_plan_dft_2d(int n0, int n1, fftwq_complex *in, fftwq_complex *out, int sign, unsigned flags); 
+extern fftwq_plan fftwq_plan_dft_3d(int n0, int n1, int n2, fftwq_complex *in, fftwq_complex *out, int sign, unsigned flags); 
+extern fftwq_plan fftwq_plan_many_dft(int rank, const int *n, int howmany, fftwq_complex *in, const int *inembed, int istride, int idist, fftwq_complex *out, const int *onembed, int ostride, int odist, int sign, unsigned flags); 
+extern fftwq_plan fftwq_plan_guru_dft(int rank, const fftwq_iodim *dims, int howmany_rank, const fftwq_iodim *howmany_dims, fftwq_complex *in, fftwq_complex *out, int sign, unsigned flags); 
+extern fftwq_plan fftwq_plan_guru_split_dft(int rank, const fftwq_iodim *dims, int howmany_rank, const fftwq_iodim *howmany_dims, __float128 *ri, __float128 *ii, __float128 *ro, __float128 *io, unsigned flags); 
+extern fftwq_plan fftwq_plan_guru64_dft(int rank, const fftwq_iodim64 *dims, int howmany_rank, const fftwq_iodim64 *howmany_dims, fftwq_complex *in, fftwq_complex *out, int sign, unsigned flags); 
+extern fftwq_plan fftwq_plan_guru64_split_dft(int rank, const fftwq_iodim64 *dims, int howmany_rank, const fftwq_iodim64 *howmany_dims, __float128 *ri, __float128 *ii, __float128 *ro, __float128 *io, unsigned flags); 
+extern void fftwq_execute_dft(const fftwq_plan p, fftwq_complex *in, fftwq_complex *out); 
+extern void fftwq_execute_split_dft(const fftwq_plan p, __float128 *ri, __float128 *ii, __float128 *ro, __float128 *io); 
+extern fftwq_plan fftwq_plan_many_dft_r2c(int rank, const int *n, int howmany, __float128 *in, const int *inembed, int istride, int idist, fftwq_complex *out, const int *onembed, int ostride, int odist, unsigned flags); 
+extern fftwq_plan fftwq_plan_dft_r2c(int rank, const int *n, __float128 *in, fftwq_complex *out, unsigned flags); 
+extern fftwq_plan fftwq_plan_dft_r2c_1d(int n,__float128 *in,fftwq_complex *out,unsigned flags); 
+extern fftwq_plan fftwq_plan_dft_r2c_2d(int n0, int n1, __float128 *in, fftwq_complex *out, unsigned flags); 
+extern fftwq_plan fftwq_plan_dft_r2c_3d(int n0, int n1, int n2, __float128 *in, fftwq_complex *out, unsigned flags); 
+extern fftwq_plan fftwq_plan_many_dft_c2r(int rank, const int *n, int howmany, fftwq_complex *in, const int *inembed, int istride, int idist, __float128 *out, const int *onembed, int ostride, int odist, unsigned flags); 
+extern fftwq_plan fftwq_plan_dft_c2r(int rank, const int *n, fftwq_complex *in, __float128 *out, unsigned flags); 
+extern fftwq_plan fftwq_plan_dft_c2r_1d(int n,fftwq_complex *in,__float128 *out,unsigned flags); 
+extern fftwq_plan fftwq_plan_dft_c2r_2d(int n0, int n1, fftwq_complex *in, __float128 *out, unsigned flags); 
+extern fftwq_plan fftwq_plan_dft_c2r_3d(int n0, int n1, int n2, fftwq_complex *in, __float128 *out, unsigned flags); 
+extern fftwq_plan fftwq_plan_guru_dft_r2c(int rank, const fftwq_iodim *dims, int howmany_rank, const fftwq_iodim *howmany_dims, __float128 *in, fftwq_complex *out, unsigned flags); 
+extern fftwq_plan fftwq_plan_guru_dft_c2r(int rank, const fftwq_iodim *dims, int howmany_rank, const fftwq_iodim *howmany_dims, fftwq_complex *in, __float128 *out, unsigned flags); 
+extern fftwq_plan fftwq_plan_guru_split_dft_r2c( int rank, const fftwq_iodim *dims, int howmany_rank, const fftwq_iodim *howmany_dims, __float128 *in, __float128 *ro, __float128 *io, unsigned flags); 
+extern fftwq_plan fftwq_plan_guru_split_dft_c2r( int rank, const fftwq_iodim *dims, int howmany_rank, const fftwq_iodim *howmany_dims, __float128 *ri, __float128 *ii, __float128 *out, unsigned flags); 
+extern fftwq_plan fftwq_plan_guru64_dft_r2c(int rank, const fftwq_iodim64 *dims, int howmany_rank, const fftwq_iodim64 *howmany_dims, __float128 *in, fftwq_complex *out, unsigned flags); 
+extern fftwq_plan fftwq_plan_guru64_dft_c2r(int rank, const fftwq_iodim64 *dims, int howmany_rank, const fftwq_iodim64 *howmany_dims, fftwq_complex *in, __float128 *out, unsigned flags); 
+extern fftwq_plan fftwq_plan_guru64_split_dft_r2c( int rank, const fftwq_iodim64 *dims, int howmany_rank, const fftwq_iodim64 *howmany_dims, __float128 *in, __float128 *ro, __float128 *io, unsigned flags); 
+extern fftwq_plan fftwq_plan_guru64_split_dft_c2r( int rank, const fftwq_iodim64 *dims, int howmany_rank, const fftwq_iodim64 *howmany_dims, __float128 *ri, __float128 *ii, __float128 *out, unsigned flags); 
+extern void fftwq_execute_dft_r2c(const fftwq_plan p, __float128 *in, fftwq_complex *out); 
+extern void fftwq_execute_dft_c2r(const fftwq_plan p, fftwq_complex *in, __float128 *out); 
+extern void fftwq_execute_split_dft_r2c(const fftwq_plan p, __float128 *in, __float128 *ro, __float128 *io); 
+extern void fftwq_execute_split_dft_c2r(const fftwq_plan p, __float128 *ri, __float128 *ii, __float128 *out); 
+extern fftwq_plan fftwq_plan_many_r2r(int rank, const int *n, int howmany, __float128 *in, const int *inembed, int istride, int idist, __float128 *out, const int *onembed, int ostride, int odist, const fftwq_r2r_kind *kind, unsigned flags); 
+extern fftwq_plan fftwq_plan_r2r(int rank, const int *n, __float128 *in, __float128 *out, const fftwq_r2r_kind *kind, unsigned flags); 
+extern fftwq_plan fftwq_plan_r2r_1d(int n, __float128 *in, __float128 *out, fftwq_r2r_kind kind, unsigned flags); 
+extern fftwq_plan fftwq_plan_r2r_2d(int n0, int n1, __float128 *in, __float128 *out, fftwq_r2r_kind kind0, fftwq_r2r_kind kind1, unsigned flags); 
+extern fftwq_plan fftwq_plan_r2r_3d(int n0, int n1, int n2, __float128 *in, __float128 *out, fftwq_r2r_kind kind0, fftwq_r2r_kind kind1, fftwq_r2r_kind kind2, unsigned flags); 
+extern fftwq_plan fftwq_plan_guru_r2r(int rank, const fftwq_iodim *dims, int howmany_rank, const fftwq_iodim *howmany_dims, __float128 *in, __float128 *out, const fftwq_r2r_kind *kind, unsigned flags); 
+extern fftwq_plan fftwq_plan_guru64_r2r(int rank, const fftwq_iodim64 *dims, int howmany_rank, const fftwq_iodim64 *howmany_dims, __float128 *in, __float128 *out, const fftwq_r2r_kind *kind, unsigned flags); 
+extern void fftwq_execute_r2r(const fftwq_plan p, __float128 *in, __float128 *out); 
+extern void fftwq_destroy_plan(fftwq_plan p); 
+extern void fftwq_forget_wisdom(void); 
+extern void fftwq_cleanup(void); 
+extern void fftwq_set_timelimit(double t); 
+extern void fftwq_plan_with_nthreads(int nthreads); 
+extern int fftwq_init_threads(void); 
+extern void fftwq_cleanup_threads(void); 
+extern int fftwq_export_wisdom_to_filename(const char *filename); 
+extern void fftwq_export_wisdom_to_file(FILE *output_file); 
+extern char *fftwq_export_wisdom_to_string(void); 
+extern void fftwq_export_wisdom(fftwq_write_char_func write_char, void *data); 
+extern int fftwq_import_system_wisdom(void); 
+extern int fftwq_import_wisdom_from_filename(const char *filename); 
+extern int fftwq_import_wisdom_from_file(FILE *input_file); 
+extern int fftwq_import_wisdom_from_string(const char *input_string); 
+extern int fftwq_import_wisdom(fftwq_read_char_func read_char, void *data); 
+extern void fftwq_fprint_plan(const fftwq_plan p, FILE *output_file); 
+extern void fftwq_print_plan(const fftwq_plan p); 
+extern void *fftwq_malloc(size_t n); 
+extern __float128 *fftwq_alloc_real(size_t n); 
+extern fftwq_complex *fftwq_alloc_complex(size_t n); 
+extern void fftwq_free(void *p); 
+extern void fftwq_flops(const fftwq_plan p, double *add, double *mul, double *fmas); 
+extern double fftwq_estimate_cost(const fftwq_plan p); 
+extern double fftwq_cost(const fftwq_plan p); 
+extern const char fftwq_version[]; 
+extern const char fftwq_cc[]; 
+extern const char fftwq_codelet_optim[];
+
+]]
diff --git a/fftw3/defines.lua b/fftw3/defines.lua
new file mode 100644
index 000000000..a252b2aee
--- /dev/null
+++ b/fftw3/defines.lua
@@ -0,0 +1,36 @@
+local ffi=require 'ffi'
+local defines = {}
+function defines.register_hashdefs(fftw, C)
+   fftw.MEASURE = 0
+   fftw.DESTROY_INPUT = 1
+   fftw.UNALIGNED = 2
+   fftw.CONSERVE_MEMORY = 4
+   fftw.EXHAUSTIVE = 8 -- /* NO_EXHAUSTIVE is default */
+   fftw.PRESERVE_INPUT = 16 --  /* cancels DESTROY_INPUT */
+   fftw.PATIENT =32 --  /* IMPATIENT is default */
+   fftw.ESTIMATE = 64
+   fftw.WISDOM_ONLY = 2097152 --  (1U << 21)
+
+   fftw.FORWARD  = -1
+   fftw.BACKWARD =  1
+
+   fftw.NO_TIMELIMIT = (-1.0)
+
+   -- -- typedef
+   fftw.r2r_kind = ffi.typeof('fftw_r2r_kind')
+   fftw.R2HC=0
+   fftw.HC2R=1
+   fftw.DHT=2
+
+   fftw.REDFT00=3
+   fftw.REDFT01=4
+   fftw.REDFT10=5
+   fftw.REDFT11=6
+
+   fftw.RODFT00=7
+   fftw.RODFT01=8
+   fftw.RODFT10=9
+   fftw.RODFT11=10
+end
+
+return defines
\ No newline at end of file
diff --git a/fftw3/init.lua b/fftw3/init.lua
new file mode 100644
index 000000000..5e5e5590d
--- /dev/null
+++ b/fftw3/init.lua
@@ -0,0 +1,249 @@
+-- Do not change this file manually
+-- Generated with dev/create-init.lua
+
+local ffi = require 'ffi'
+local C = ffi.load('fftw3')
+local fftw = {C=C}
+
+require 'fftw3.cdefs'
+
+local defines = require 'fftw3.defines'
+defines.register_hashdefs(fftw, C)
+
+local function register(luafuncname, funcname)
+   local symexists, msg = pcall(function()
+                              local sym = C[funcname]
+                           end)
+   if symexists then
+      fftw[luafuncname] = C[funcname]
+   end
+end
+
+register('execute', 'fftw_execute')
+register('plan_dft', 'fftw_plan_dft')
+register('plan_dft_1d', 'fftw_plan_dft_1d')
+register('plan_dft_2d', 'fftw_plan_dft_2d')
+register('plan_dft_3d', 'fftw_plan_dft_3d')
+register('plan_many_dft', 'fftw_plan_many_dft')
+register('plan_guru_dft', 'fftw_plan_guru_dft')
+register('plan_guru_split_dft', 'fftw_plan_guru_split_dft')
+register('plan_guru64_dft', 'fftw_plan_guru64_dft')
+register('plan_guru64_split_dft', 'fftw_plan_guru64_split_dft')
+register('execute_dft', 'fftw_execute_dft')
+register('execute_split_dft', 'fftw_execute_split_dft')
+register('plan_many_dft_r2c', 'fftw_plan_many_dft_r2c')
+register('plan_dft_r2c', 'fftw_plan_dft_r2c')
+register('plan_dft_r2c_1d', 'fftw_plan_dft_r2c_1d')
+register('plan_dft_r2c_2d', 'fftw_plan_dft_r2c_2d')
+register('plan_dft_r2c_3d', 'fftw_plan_dft_r2c_3d')
+register('plan_many_dft_c2r', 'fftw_plan_many_dft_c2r')
+register('plan_dft_c2r', 'fftw_plan_dft_c2r')
+register('plan_dft_c2r_1d', 'fftw_plan_dft_c2r_1d')
+register('plan_dft_c2r_2d', 'fftw_plan_dft_c2r_2d')
+register('plan_dft_c2r_3d', 'fftw_plan_dft_c2r_3d')
+register('plan_guru_dft_r2c', 'fftw_plan_guru_dft_r2c')
+register('plan_guru_dft_c2r', 'fftw_plan_guru_dft_c2r')
+register('plan_guru_split_dft_r2c', 'fftw_plan_guru_split_dft_r2c')
+register('plan_guru_split_dft_c2r', 'fftw_plan_guru_split_dft_c2r')
+register('plan_guru64_dft_r2c', 'fftw_plan_guru64_dft_r2c')
+register('plan_guru64_dft_c2r', 'fftw_plan_guru64_dft_c2r')
+register('plan_guru64_split_dft_r2c', 'fftw_plan_guru64_split_dft_r2c')
+register('plan_guru64_split_dft_c2r', 'fftw_plan_guru64_split_dft_c2r')
+register('execute_dft_r2c', 'fftw_execute_dft_r2c')
+register('execute_dft_c2r', 'fftw_execute_dft_c2r')
+register('execute_split_dft_r2c', 'fftw_execute_split_dft_r2c')
+register('execute_split_dft_c2r', 'fftw_execute_split_dft_c2r')
+register('plan_many_r2r', 'fftw_plan_many_r2r')
+register('plan_r2r', 'fftw_plan_r2r')
+register('plan_r2r_1d', 'fftw_plan_r2r_1d')
+register('plan_r2r_2d', 'fftw_plan_r2r_2d')
+register('plan_r2r_3d', 'fftw_plan_r2r_3d')
+register('plan_guru_r2r', 'fftw_plan_guru_r2r')
+register('plan_guru64_r2r', 'fftw_plan_guru64_r2r')
+register('execute_r2r', 'fftw_execute_r2r')
+register('destroy_plan', 'fftw_destroy_plan')
+register('forget_wisdom', 'fftw_forget_wisdom')
+register('cleanup', 'fftw_cleanup')
+register('set_timelimit', 'fftw_set_timelimit')
+register('plan_with_nthreads', 'fftw_plan_with_nthreads')
+register('init_threads', 'fftw_init_threads')
+register('cleanup_threads', 'fftw_cleanup_threads')
+register('export_wisdom_to_filename', 'fftw_export_wisdom_to_filename')
+register('export_wisdom_to_file', 'fftw_export_wisdom_to_file')
+register('export_wisdom_to_string', 'fftw_export_wisdom_to_string')
+register('export_wisdom', 'fftw_export_wisdom')
+register('import_system_wisdom', 'fftw_import_system_wisdom')
+register('import_wisdom_from_filename', 'fftw_import_wisdom_from_filename')
+register('import_wisdom_from_file', 'fftw_import_wisdom_from_file')
+register('import_wisdom_from_string', 'fftw_import_wisdom_from_string')
+register('import_wisdom', 'fftw_import_wisdom')
+register('fprint_plan', 'fftw_fprint_plan')
+register('print_plan', 'fftw_print_plan')
+register('malloc', 'fftw_malloc')
+register('alloc_real', 'fftw_alloc_real')
+register('alloc_complex', 'fftw_alloc_complex')
+register('free', 'fftw_free')
+register('flops', 'fftw_flops')
+register('estimate_cost', 'fftw_estimate_cost')
+register('cost', 'fftw_cost')
+
+register('r2r_kind_do_not_use_me', 'fftw_r2r_kind_do_not_use_me')
+register('iodim_do_not_use_me', 'fftw_iodim_do_not_use_me')
+register('iodim64_do_not_use_me', 'fftw_iodim64_do_not_use_me')
+register('write_char_func_do_not_use_me', 'fftw_write_char_func_do_not_use_me')
+register('read_char_func_do_not_use_me', 'fftw_read_char_func_do_not_use_me')
+register('complex', 'fftw_complex')
+register('plan_s', 'fftw_plan_s')
+register('plan', 'fftw_plan')
+register('iodim_do_not_use_me', 'fftw_iodim_do_not_use_me')
+register('iodim', 'fftw_iodim')
+register('iodim64_do_not_use_me', 'fftw_iodim64_do_not_use_me')
+register('iodim64', 'fftw_iodim64')
+register('r2r_kind_do_not_use_me', 'fftw_r2r_kind_do_not_use_me')
+register('r2r_kind', 'fftw_r2r_kind')
+register('write_char_func_do_not_use_me', 'fftw_write_char_func_do_not_use_me')
+register('write_char_func', 'fftw_write_char_func')
+register('read_char_func_do_not_use_me', 'fftw_read_char_func_do_not_use_me')
+register('read_char_func', 'fftw_read_char_func')
+register('plan', 'fftw_plan')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('iodim', 'fftw_iodim')
+register('iodim', 'fftw_iodim')
+register('complex', 'fftw_complex')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('iodim', 'fftw_iodim')
+register('iodim', 'fftw_iodim')
+register('plan', 'fftw_plan')
+register('iodim64', 'fftw_iodim64')
+register('iodim64', 'fftw_iodim64')
+register('complex', 'fftw_complex')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('iodim64', 'fftw_iodim64')
+register('iodim64', 'fftw_iodim64')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('iodim', 'fftw_iodim')
+register('iodim', 'fftw_iodim')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('iodim', 'fftw_iodim')
+register('iodim', 'fftw_iodim')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('iodim', 'fftw_iodim')
+register('iodim', 'fftw_iodim')
+register('plan', 'fftw_plan')
+register('iodim', 'fftw_iodim')
+register('iodim', 'fftw_iodim')
+register('plan', 'fftw_plan')
+register('iodim64', 'fftw_iodim64')
+register('iodim64', 'fftw_iodim64')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('iodim64', 'fftw_iodim64')
+register('iodim64', 'fftw_iodim64')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('iodim64', 'fftw_iodim64')
+register('iodim64', 'fftw_iodim64')
+register('plan', 'fftw_plan')
+register('iodim64', 'fftw_iodim64')
+register('iodim64', 'fftw_iodim64')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('plan', 'fftw_plan')
+register('plan', 'fftw_plan')
+register('r2r_kind', 'fftw_r2r_kind')
+register('plan', 'fftw_plan')
+register('r2r_kind', 'fftw_r2r_kind')
+register('plan', 'fftw_plan')
+register('r2r_kind', 'fftw_r2r_kind')
+register('plan', 'fftw_plan')
+register('r2r_kind', 'fftw_r2r_kind')
+register('r2r_kind', 'fftw_r2r_kind')
+register('plan', 'fftw_plan')
+register('r2r_kind', 'fftw_r2r_kind')
+register('r2r_kind', 'fftw_r2r_kind')
+register('r2r_kind', 'fftw_r2r_kind')
+register('plan', 'fftw_plan')
+register('iodim', 'fftw_iodim')
+register('iodim', 'fftw_iodim')
+register('r2r_kind', 'fftw_r2r_kind')
+register('plan', 'fftw_plan')
+register('iodim64', 'fftw_iodim64')
+register('iodim64', 'fftw_iodim64')
+register('r2r_kind', 'fftw_r2r_kind')
+register('plan', 'fftw_plan')
+register('plan', 'fftw_plan')
+register('write_char_func', 'fftw_write_char_func')
+register('read_char_func', 'fftw_read_char_func')
+register('plan', 'fftw_plan')
+register('plan', 'fftw_plan')
+register('complex', 'fftw_complex')
+register('plan', 'fftw_plan')
+register('plan', 'fftw_plan')
+register('plan', 'fftw_plan')
+register('version', 'fftw_version')
+register('cc', 'fftw_cc')
+register('codelet_optim', 'fftw_codelet_optim')
+register('iodim_do_not_use_me', 'fftw_iodim_do_not_use_me')
+register('iodim64_do_not_use_me', 'fftw_iodim64_do_not_use_me')
+register('r2r_kind_do_not_use_me', 'fftw_r2r_kind_do_not_use_me')
+register('write_char_func_do_not_use_me', 'fftw_write_char_func_do_not_use_me')
+register('read_char_func_do_not_use_me', 'fftw_read_char_func_do_not_use_me')
+register('iodim_do_not_use_me', 'fftw_iodim_do_not_use_me')
+register('iodim64_do_not_use_me', 'fftw_iodim64_do_not_use_me')
+register('r2r_kind_do_not_use_me', 'fftw_r2r_kind_do_not_use_me')
+register('write_char_func_do_not_use_me', 'fftw_write_char_func_do_not_use_me')
+register('read_char_func_do_not_use_me', 'fftw_read_char_func_do_not_use_me')
+register('iodim_do_not_use_me', 'fftw_iodim_do_not_use_me')
+register('iodim64_do_not_use_me', 'fftw_iodim64_do_not_use_me')
+register('r2r_kind_do_not_use_me', 'fftw_r2r_kind_do_not_use_me')
+register('write_char_func_do_not_use_me', 'fftw_write_char_func_do_not_use_me')
+register('read_char_func_do_not_use_me', 'fftw_read_char_func_do_not_use_me')
+
+return fftw
+
diff --git a/gslext.lua b/gslext.lua
index 957482dfd..98936fa5b 100644
--- a/gslext.lua
+++ b/gslext.lua
@@ -7,7 +7,7 @@ require('num')
 require('rng')
 require('rnd')
 require('integ-init')
-require('fft-init')
+require('fft')
 require('graph-init')
 require('randist')
 require('import')
diff --git a/tests/fft-tests.lua b/tests/fft-tests.lua
index b777797e3..161d6aeb7 100644
--- a/tests/fft-tests.lua
+++ b/tests/fft-tests.lua
@@ -1,172 +1,37 @@
-use 'math'
-use 'graph'
-use 'gsl'
+--This is a test for all the fft functions in the library
 
-local function test1()
-   local n, ncut = 8*3*5, 16
+local size = 10
+local size2 = 5
 
-   local sq = matrix.new(n, 1, |i| i < n/3 and 0 or (i < 2*n/3 and 1 or 0))
+local r1d = matrix.new(size, 1, |i,j|i)
+local c1d = matrix.cnew(size,1, |i,j|i)
 
-   local pt = plot('Original signal / reconstructed')
-   local pf = plot('FFT Power Spectrum')
+local r2d = matrix.new(size,size, |i,j|i)
+local c2d = matrix.cnew(size,size, |i,j|i)
 
-   pt:addline(filine(|i| sq[i], n), 'black')
+local r2dasym = matrix.new(size,5, |i,j|i)
+local c2dasym = matrix.new(size,5, |i,j|i)
 
-   local ft = fft(sq)
+local r3d = matrix.new(size2*size2*size2, 1, |i,j|i)
+local c3d = matrix.cnew(size2*size2*size2,1, |i,j|i)
 
-   pf:add(ibars(isample(|k| complex.abs(ft:get(k)), 0, n/2)), 'black')
+-- 1D test
 
-   for k=ncut, n - ncut do ft:set(k,0) end
-   sqt = fftinv(ft)
+print(num.fftinv(num.fft(c1d))/size)
+print('---')
+print(num.rfftinv(num.rfft(r1d))/size)
+print('---')
 
-   pt:addline(filine(|i| sqt[i], n), 'red')
+-- 2D tests
 
-   pf:show()
-   pt:show()
+print(num.fft2inv(num.fft2(c2d))/(size*size))
+print('---')
+print(num.rfft2inv(num.rfft2(r2d))/(size*size))
+print('---')
 
-   return pt, pf
-end
+-- 3D tests
 
-local function test1_radix2()
-   local n, ncut = 256, 16
-
-   local sq = matrix.new(n, 1, |i| i < n/3 and 0 or (i < 2*n/3 and 1 or 0))
-
-   local pt = plot('Original signal / reconstructed')
-   local pf = plot('FFT Power Spectrum')
-
-   pt:addline(filine(|i| sq[i], n), 'black')
-
-   local ft = fft(sq)
-
-   pf:add(ibars(isample(|k| complex.abs(ft:get(k)), 0, n/2)), 'black')
-
-   for k=ncut, n - ncut do ft:set(k,0) end
-   sqt = fftinv(ft)
-
-   pt:addline(filine(|i| sqt[i], n), 'red')
-
-   pf:show()
-   pt:show()
-
-   return pt, pf
-end
-
-local function test1_ip_radix2()
-   local hcget, hcset = gsl.halfcomplex_radix2_get, gsl.halfcomplex_radix2_set
-
-   local n, ncut = 256, 16
-
-   local sq = matrix.new(n, 1, |i| i < n/3 and 0 or (i < 2*n/3 and 1 or 0))
-
-   local pt = plot('Original signal / reconstructed')
-   local pf = plot('FFT Power Spectrum')
-
-   pt:addline(filine(|i| sq[i], n), 'black')
-
-   fft_radix2(sq)
-
-   pf:add(ibars(isample(|k| complex.abs(hcget(sq, k)), 0, n/2)), 'black')
-
-   for k=ncut, n - ncut do hcset(sq, k, 0) end
-   fft_radix2_inverse(sq)
-
-   pt:addline(filine(|i| sq[i], n), 'red')
-
-   pf:show()
-   pt:show()
-
-   return pt, pf
-end
-
-local function test1_ip()
-   local hcget, hcset = gsl.halfcomplex_get, gsl.halfcomplex_set
-
-   local n, ncut = 8*3*5, 16
-
-   local sq = matrix.new(n, 1, |i| i < n/3 and 0 or (i < 2*n/3 and 1 or 0))
-
-   local pt = plot('Original signal / reconstructed')
-   local pf = plot('FFT Power Spectrum')
-
-   pt:addline(filine(|i| sq[i], n), 'black')
-
-   fft_real(sq)
-
-   pf:add(ibars(isample(|k| complex.abs(hcget(sq, k)), 0, n/2)), 'black')
-
-   for k=ncut, n - ncut do hcset(sq, k, 0) end
-   fft_halfcomplex_inverse(sq)
-
-   pt:addline(filine(|i| sq[i], n), 'red')
-
-   pf:show()
-   pt:show()
-
-   return pt, pf
-end
-
-local function test2()
-   local n, ncut, order = 512, 11, 8
-   local x1 = besselJ_zero(order, 14)
-   local xsmp = |k| x1*(k-1)/(n-1)
-
-   local bess = matrix.new(n, 1, |i| besselJ(order, xsmp(i)))
-
-   local p = plot('Original signal / reconstructed')
-   p:addline(filine(|i| bess[i], n), 'black')
-
-   local ft = fft(bess)
-
-   fftplot = plot('FFT power spectrum')
-   bars = ibars(isample(|k| complex.abs(ft:get(k)), 0, 60))
-   fftplot:add(bars, 'darkgreen')
-   fftplot:addline(bars, 'black')
-   fftplot:show()
-
-   for k=ncut, n/2 do ft:set(k,0) end
-   local bessr = fftinv(ft)
-
-   p:addline(filine(|i| bessr[i], n), 'red', {{'dash', 7, 3}})
-   p:show()
-
-   return p, fftplot
-end
-
-
-local function test1_stride()
-   local n, ncut, nb = 256, 16, 3
-
-   local function squaref(i, n1, n2)
-      return i < n1 and 0 or (i < n2 and 1 or 0)
-   end
-
-   local sq = matrix.new(n, nb, |i, j| squaref(i, n/(3*j), 2*n/(3*j)))
-
-   local w = window('v' .. string.rep('.', nb))
-   local ftt = {}
-   for j=1, nb do
-      local ft = fft(sq:col(j))
-      local pf = plot()
-      pf:add(ibars(isample(|k| complex.abs(ft:get(k)), 0, n/2)), 'black')
-      w:attach(pf, j)
-      ftt[j] = ft
-   end
-
-   local w = window('v' .. string.rep('.', nb))
-   for j, ft in ipairs(ftt) do
-      local pt = plot('Original signal / reconstructed')
-      for k=ncut, n - ncut do ft:set(k,0) end
-      local sqt = fftinv(ft)
-      pt:addline(filine(|i| sq:get(i,j), n), 'black')
-      pt:addline(filine(|i| sqt[i], n))
-      w:attach(pt, j)
-   end
-end
-
-return {test1= test1, 
-	test2= test1_radix2, 
-	test3= test1_ip_radix2, 
-	test4= test1_ip, 
-	test5= test2, 
-	test6= test1_stride}
+print('---')
+print(num.fftninv(num.fftn(c3d, {size2, size2, size2}), {size2, size2, size2})/(size2*size2*size2))
+print('---')
+print(num.rfftninv(num.rfftn(r3d, {size2, size2, size2}), {size2, size2, size2})/(size2*size2*size2))
\ No newline at end of file