diff --git a/presentations/pasc/Makefile b/presentations/pasc/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..575514e294ff02d83fbf01015b7fde56b0302d1d
--- /dev/null
+++ b/presentations/pasc/Makefile
@@ -0,0 +1,24 @@
+DATADIR      = ./
+FILTERDIR    = $(DATADIR)/filters
+RESOURCEDIR  = $(DATADIR)/resources
+
+PDFOPTIONS = -t beamer
+PDFOPTIONS += --highlight-style my_highlight.theme
+PDFOPTIONS += --pdf-engine pdflatex
+PDFOPTIONS += --template=./default.latex
+PDFOPTIONS += -V theme:metropolis
+PDFOPTIONS += -V themeoptions:numbering=none -V themeoptions:progressbar=foot
+PDFOPTIONS += -V fontsize=smaller
+# PDFOPTIONS += --filter  pandoc-beamer-block
+# PDFOPTIONS += --lua-filter=${FILTERDIR}/tex.lua
+# PDFOPTIONS += --include-in-header=${RESOURCEDIR}/definitions.tex
+# PDFOPTIONS += --include-in-header=${RESOURCEDIR}/beamer.tex
+PDFOPTIONS += $(OPTIONS)
+
+all: pres.pdf
+
+pres.pdf: pres.md metadata.yaml
+	pandoc $(PDFOPTIONS) -o $@ $^
+
+clean:
+	rm -f *.pdf
diff --git a/presentations/pasc/code/.gitignore b/presentations/pasc/code/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..fc0cf783b825875336cec320dfadbdb0fc051c40
--- /dev/null
+++ b/presentations/pasc/code/.gitignore
@@ -0,0 +1,5 @@
+*.o
+lbm
+liblbm.h
+liblbm
+liblbm.c
diff --git a/presentations/pasc/code/Makefile b/presentations/pasc/code/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..cc6e33264955f87c888fefc6cc0938a5b46a51a5
--- /dev/null
+++ b/presentations/pasc/code/Makefile
@@ -0,0 +1,75 @@
+TARGET=c
+
+ifeq ($(TARGET),cuda)
+	CUDALIBS=-lcudart -lcuda -lnvrtc
+else ifeq ($(TARGET), opencl) 
+	OPENCLIBS=-lOpenCL
+endif
+
+CC=gcc
+FC=futhark
+OPTS=-Wall -Wextra -g -std=gnu11
+SANITIZERS=-fsanitize=address
+OPTIM=-O3
+LIBS=-lm $(SDLLIBS) $(CUDALIBS) $(OPENCLIBS) $(SANITIZERS)
+
+all: lbm3d
+
+raider_lbm3d: lbm3d.o raider_lbm.o scalar_field.o tensor_field.o scalar_field_2d.o tensor_field_2d.o lbm_utils.o vtk.o point.o box.o units.o utils.o
+	$(CC) $(OPTS) $(OPTIM) -o $@ $^ $(LIBS)
+
+
+lbm3d: lbm3d.o liblbm.o scalar_field.o tensor_field.o scalar_field_2d.o tensor_field_2d.o lbm_utils.o vtk.o point.o box.o units.o utils.o
+	$(CC) $(OPTS) $(OPTIM) -o $@ $^ $(LIBS)
+
+lbm3d.o: lbm3d.c liblbm.c liblbm.h c_interface/*.h
+	$(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $<
+
+liblbm.o: liblbm.c liblbm.h
+	$(CC) $(OPTIM) $(OPTS) -c $<
+
+liblbm.c: liblbm.fut
+	$(FC) $(TARGET) --library $<
+
+raider_lbm.o: ../presentation/raider_lbm.c ../presentation/raider_lbm.h
+	$(CC) $(OPTIM) $(OPTS) -c $<
+
+../presentation/raider_lbm.c: ../presentation/raider_lbm.fut
+	$(FC) $(TARGET) --library $<
+
+tensor_field.o: c_interface/tensor_field.c c_interface/tensor_field.h c_interface/scalar_field.h
+	$(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $<
+
+scalar_field.o: c_interface/scalar_field.c c_interface/scalar_field.h
+	$(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $<
+
+tensor_field_2d.o: c_interface/tensor_field_2d.c c_interface/tensor_field_2d.h c_interface/scalar_field_2d.h
+	$(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $<
+
+scalar_field_2d.o: c_interface/scalar_field_2d.c c_interface/scalar_field_2d.h
+	$(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $<
+
+lbm_utils.o: c_interface/lbm_utils.c c_interface/lbm_utils.h c_interface/globals.h c_interface/scalar_field.h c_interface/tensor_field.h
+	$(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $<
+
+vtk.o: c_interface/vtk.c c_interface/vtk.h c_interface/box.h c_interface/point.h c_interface/scalar_field.h c_interface/tensor_field.h c_interface/globals.h
+	$(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $<
+
+point.o: c_interface/point.c c_interface/point.h c_interface/globals.h
+	$(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $<
+
+box.o: c_interface/box.c c_interface/box.h c_interface/globals.h
+	$(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $<
+
+utils.o: c_interface/utils.c c_interface/utils.h c_interface/globals.h
+	$(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $<
+
+units.o: c_interface/units.c c_interface/units.h c_interface/utils.h c_interface/globals.h
+	$(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $<
+
+
+clean:
+	rm -f liblbm.h liblbm.c liblbm.o liblbm
+	rm -f *.o
+	rm -f lbm3d
+
diff --git a/presentations/pasc/code/c_interface/box.c b/presentations/pasc/code/c_interface/box.c
new file mode 100644
index 0000000000000000000000000000000000000000..81e98b57f17e0e205ede6077a2d142f8daab6afd
--- /dev/null
+++ b/presentations/pasc/code/c_interface/box.c
@@ -0,0 +1,12 @@
+#include "box.h"
+
+box box_create(fint x0, fint x1, fint y0, fint y1, fint z0, fint z1) {
+    box b;
+    b.x0 = x0;
+    b.x1 = x1;
+    b.y0 = y0;
+    b.y1 = y1;
+    b.z0 = z0;
+    b.z1 = z1;
+    return b;
+}
\ No newline at end of file
diff --git a/presentations/pasc/code/c_interface/box.h b/presentations/pasc/code/c_interface/box.h
new file mode 100644
index 0000000000000000000000000000000000000000..7ad18b1d9f83fb829a801c4862791cf7c515f285
--- /dev/null
+++ b/presentations/pasc/code/c_interface/box.h
@@ -0,0 +1,12 @@
+#ifndef BOX_H
+#define BOX_H
+
+#include "globals.h"
+
+typedef struct {
+    fint x0, x1, y0, y1, z0, z1;
+} box;
+
+box box_create(fint x0, fint x1, fint y0, fint y1, fint z0, fint z1);
+
+#endif
diff --git a/presentations/pasc/code/c_interface/globals.h b/presentations/pasc/code/c_interface/globals.h
new file mode 100644
index 0000000000000000000000000000000000000000..1c8ea6c4c1fdfdf9cc7f62b97afa40df46733d98
--- /dev/null
+++ b/presentations/pasc/code/c_interface/globals.h
@@ -0,0 +1,18 @@
+#ifndef GLOBALS_H
+#define GLOBALS_H
+
+#include <stdint.h>
+#include <math.h>
+
+typedef int32_t fint;
+typedef uint32_t ufint;
+
+#ifdef DBL
+typedef double T;
+#else
+typedef float T;
+#endif
+
+static const T pi = (T)4*atan((T)1);
+
+#endif
\ No newline at end of file
diff --git a/presentations/pasc/code/c_interface/includes.h b/presentations/pasc/code/c_interface/includes.h
new file mode 100644
index 0000000000000000000000000000000000000000..b8dde1935014e31d48c68c96173c4988964cca95
--- /dev/null
+++ b/presentations/pasc/code/c_interface/includes.h
@@ -0,0 +1,16 @@
+#ifndef INCLUDES_H
+#define INCLUDES_H
+
+#include "globals.h"
+#include "scalar_field_2d.h"
+#include "scalar_field.h"
+#include "tensor_field_2d.h"
+#include "tensor_field.h"
+#include "lbm_utils.h"
+#include "point.h"
+#include "box.h"
+#include "vtk.h"
+#include "units.h"
+#include "utils.h"
+
+#endif
\ No newline at end of file
diff --git a/presentations/pasc/code/c_interface/lbm_utils.c b/presentations/pasc/code/c_interface/lbm_utils.c
new file mode 100644
index 0000000000000000000000000000000000000000..3cc3966ec94adaa1e509b636b2220743799c7291
--- /dev/null
+++ b/presentations/pasc/code/c_interface/lbm_utils.c
@@ -0,0 +1,157 @@
+#include "lbm_utils.h"
+#include <math.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+
+static T compute_feq(T w, const int c[], T rho, T u[]) {
+    T c_u = c[0] * u[0];
+    T u_sqr = u[0] * u[0];
+    for (int id = 1; id < D3; ++id) {
+        c_u   += c[id] * u[id];
+        u_sqr += u[id] * u[id];
+    }
+    return w * rho * (1.0 + 3.0 * c_u + 4.5 * c_u * c_u - 1.5 * u_sqr);
+}
+
+void lbm_allocate_and_ini_at_equilibrium_inplace(T rho, T u[], int nx, int ny, int nz, int q, tensor_field field) {
+    assert(nx == field.nx && "Inconsistent nx size.");
+    assert(ny == field.ny && "Inconsistent ny size.");
+    assert(nz == field.nz && "Inconsistent nz size.");
+
+    for (int ix = 0; ix < nx; ++ix) {
+        for (int iy = 0; iy < ny; ++iy) {
+            for (int iz = 0; iz < nz; ++iz) {
+                for (int ipop = 0; ipop < q; ++ipop) {
+                    field.grid[ix][iy][iz][ipop] = compute_feq(w[ipop], c_t[ipop], rho, u);
+                }
+            }
+        }
+    }
+}
+
+void lbm_allocate_and_ini_at_equilibrium_kida_inplace(T rho0, T u_max, int nx, int ny, int nz, int q, tensor_field field) {
+    assert(nx == field.nx && "Inconsistent nx size.");
+    assert(ny == field.ny && "Inconsistent ny size.");
+    assert(nz == field.nz && "Inconsistent nz size.");
+
+    srand(time(NULL));
+
+    for (int ix = 0; ix < nx; ++ix) {
+        for (int iy = 0; iy < ny; ++iy) {
+            for (int iz = 0; iz < nz; ++iz) {
+                T u[3];
+                    
+                T x = (T)2*pi*(T)ix / (T)(nx);
+                T y = (T)2*pi*(T)iy / (T)(ny);
+                T z = (T)2*pi*(T)iz / (T)(nz);
+                
+                u[0] = sin(x)*(cos((T)3*y) * cos(z) - cos(y)*cos((T)3*z));
+                u[1] = sin(y)*(cos((T)3*z) * cos(x) - cos(z)*cos((T)3*x));
+                u[2] = sin(z)*(cos((T)3*x) * cos(y) - cos(x)*cos((T)3*y));
+
+                for (fint id = 0; id < 3; ++id) {
+                    T rand_u = ((T)rand())/RAND_MAX;
+
+                    u[id] += 0.01 * rand_u;
+                    u[id] *= u_max;
+                }
+
+                T rho = -(1.0/6.0)*cos(4*z)*cos(2*x)*cos(2*y)+(1.0/36.0)*cos(4*z)*cos(4*x)*cos(2*y)+(1.0/36.0)*cos(4*z)*cos(2*x)*cos(4*y);
+                rho += +cos(2*z)*cos(2*x)*cos(2*y)-(1.0/6.0)*cos(2*z)*cos(4*x)*cos(2*y)-(1.0/6.0)*cos(2*z)*cos(2*x)*cos(4*y);
+                rho += +(1.0/36.0)*cos(4*x)*cos(4*y)*cos(2*z)-(1.0/80.0)*cos(6*y)*cos(2*z)-(1.0/10.0)*cos(4*z)*cos(2*y)+(1.0/8.0)*cos(4*z)*cos(4*y);
+                rho += -(1.0/10.0)*cos(4*z)*cos(2*x)+(1.0/8.0)*cos(4*z)*cos(4*x)+(1.0/8.0)*cos(2*z)*cos(2*y)-(1.0/10.0)*cos(2*z)*cos(4*y);
+                rho += +(1.0/8.0)*cos(2*z)*cos(2*x)-(1.0/10.0)*cos(2*z)*cos(4*x)-(1.0/80.0)*cos(6*x)*cos(2*z)-(1.0/4.0)*cos(2*x);
+                rho += -(1.0/80.0)*cos(6*z)*cos(2*x)-(1.0/80.0)*cos(6*z)*cos(2*y)-(1.0/80.0)*cos(6*x)*cos(2*y)-(1.0/10.0)*cos(2*x)*cos(4*y);
+                rho += +(1.0/8.0)*cos(2*x)*cos(2*y)+(1.0/8.0)*cos(4*x)*cos(4*y)-(1.0/80.0)*cos(2*x)*cos(6*y)-(1.0/10.0)*cos(4*x)*cos(2*y);
+                rho += -(1.0/4.0)*cos(2*z)-(1.0/4.0)*cos(2*y);
+
+                rho *= -u_max * u_max;
+                rho *= (T)3.0;
+                rho += rho0;
+
+                for (int ipop = 0; ipop < q; ++ipop) {
+                    field.grid[ix][iy][iz][ipop] = compute_feq(w[ipop], c_t[ipop], rho, u);
+                }
+            }
+        }
+    }
+}
+
+struct futhark_f32_4d *lbm_collide_and_stream(struct futhark_context *ctx,  struct futhark_f32_4d *f, T omega, fint iter) {
+    struct futhark_f32_4d *fout;
+    futhark_entry_main(ctx, &fout, f, omega, iter);
+    futhark_free_f32_4d(ctx, f);
+    
+    return fout;
+}
+
+void lbm_compute_density_inplace(tensor_field lattice, scalar_field density) {
+    assert(tensor_field_consistant_size_scalar_field(lattice, density) && "Sizes of rho and lattice are inconsistent.");
+    
+    for (int ix = 0; ix < lattice.nx; ++ix) {
+        for (int iy = 0; iy < lattice.ny; ++iy) {
+            for (int iz = 0; iz < lattice.nz; ++iz) {
+                T rho = 0.0;
+                for (int ipop = 0; ipop < lattice.q; ++ipop) {
+                    rho += lattice.grid[ix][iy][iz][ipop];
+                }
+                density.grid[ix][iy][iz] = rho;
+            }
+        }
+    }
+}
+
+void lbm_compute_velocity_inplace(tensor_field lattice, tensor_field velocity) {
+    assert(tensor_field_consistant_spatial_size_tensor_field(lattice, velocity) && "Sizes of velocity and lattice are inconsistent.");
+
+    for (int ix = 0; ix < lattice.nx; ++ix) {
+        for (int iy = 0; iy < lattice.ny; ++iy) {
+            for (int iz = 0; iz < lattice.nz; ++iz) {
+                T rho = 0.0;
+                for (int ipop = 0; ipop < lattice.q; ++ipop) {
+                    rho += lattice.grid[ix][iy][iz][ipop];
+                }
+                for (int id = 0; id < lattice.dim; ++id) {
+                    velocity.grid[ix][iy][iz][id] = (T)0;
+                    for (int ipop = 0; ipop < lattice.q; ++ipop) {
+                        velocity.grid[ix][iy][iz][id] += lattice.grid[ix][iy][iz][ipop] * c[id][ipop];
+                    }
+                    velocity.grid[ix][iy][iz][id] /= rho;
+                }
+            }
+        }
+    }
+}
+
+tensor_field lbm_allocate_and_ini_at_equilibrium(T rho, T u[], int nx, int ny, int nz, int q) {
+    tensor_field lattice = tensor_field_allocate(nx, ny, nz, q);
+
+    lbm_allocate_and_ini_at_equilibrium_inplace(rho, u, nx, ny, nz, q, lattice);
+
+    return lattice;
+}
+
+tensor_field lbm_allocate_and_ini_at_equilibrium_kida(T rho0, T u_max, int nx, int ny, int nz, int q) {
+    tensor_field lattice = tensor_field_allocate(nx, ny, nz, q);
+
+    lbm_allocate_and_ini_at_equilibrium_kida_inplace(rho0, u_max, nx, ny, nz, q, lattice);
+    
+    return lattice;
+}
+
+scalar_field lbm_compute_density(tensor_field lattice) {
+    scalar_field density = scalar_field_allocate(lattice.nx, lattice.ny, lattice.nz);
+
+    lbm_compute_density_inplace(lattice, density);
+
+    return density; 
+}
+
+tensor_field lbm_compute_velocity(tensor_field lattice) {
+    tensor_field vel = tensor_field_allocate(lattice.nx, lattice.ny, lattice.nz, lattice.dim);
+
+    lbm_compute_velocity_inplace(lattice, vel);
+
+    return vel;   
+}
diff --git a/presentations/pasc/code/c_interface/lbm_utils.h b/presentations/pasc/code/c_interface/lbm_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..4a6487dcd5ba4a9f793d6c4a82ce726ebadc082f
--- /dev/null
+++ b/presentations/pasc/code/c_interface/lbm_utils.h
@@ -0,0 +1,63 @@
+#ifndef LBM_UTILS_H
+#define LBM_UTILS_H
+
+#include "../liblbm.h"
+#include "globals.h"
+#include "scalar_field.h"
+#include "tensor_field.h"
+
+#define Q27 27
+#define D3  3
+
+static const T cs2 = 1.0 / 3.0;
+
+static const T w[Q27] = {
+                8.0/27,
+                2.0/27,  2.0/27,  2.0/27, 
+                1.0/54,  1.0/54,  1.0/54,
+                1.0/54,  1.0/54,  1.0/54,
+                1.0/216, 1.0/216, 1.0/216, 1.0/216,
+                2.0/27,  2.0/27,  2.0/27, 
+                1.0/54,  1.0/54,  1.0/54,
+                1.0/54,  1.0/54,  1.0/54,
+                1.0/216, 1.0/216, 1.0/216, 1.0/216
+            };
+
+static const fint c_t[Q27][D3] = {
+                { 0, 0, 0},
+
+                {-1, 0, 0}, { 0,-1, 0}, { 0, 0,-1},
+                {-1,-1, 0}, {-1, 1, 0}, {-1, 0,-1},
+                {-1, 0, 1}, { 0,-1,-1}, { 0,-1, 1},
+                {-1,-1,-1}, {-1,-1, 1}, {-1, 1,-1}, {-1, 1, 1},
+
+                { 1, 0, 0}, { 0, 1, 0}, { 0, 0, 1},
+                { 1, 1, 0}, { 1,-1, 0}, { 1, 0, 1},
+                { 1, 0,-1}, { 0, 1, 1}, { 0, 1,-1},
+                { 1, 1, 1}, { 1, 1,-1}, { 1,-1, 1}, { 1,-1,-1} };
+
+static const fint c[D3][Q27] = {
+    { 0, -1, 0, 0, -1, -1, -1, -1, 0, 0, -1, -1, -1, -1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1 },
+    { 0, 0, -1, 0, -1, 1, 0, 0, -1, -1, -1, -1, 1, 1, 0, 1, 0, 1, -1, 0, 0, 1, 1, 1, 1, -1, -1 }, 
+    { 0, 0, 0, -1, 0, 0, -1, 1, -1, 1, -1, 1, -1, 1, 0, 0, 1, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 },
+};
+
+void lbm_allocate_and_ini_at_equilibrium_inplace(T rho, T u[], int nx, int ny, int nz, int q, tensor_field field);
+
+void lbm_allocate_and_ini_at_equilibrium_kida_inplace(T rho0, T u_max, int nx, int ny, int nz, int q, tensor_field field);
+
+struct futhark_f32_4d *lbm_collide_and_stream(struct futhark_context *ctx,  struct futhark_f32_4d *f, T omega, fint iter);
+
+void lbm_compute_density_inplace(tensor_field lattice, scalar_field rho);
+
+void lbm_compute_velocity_inplace(tensor_field lattice, tensor_field velocity);
+
+tensor_field lbm_allocate_and_ini_at_equilibrium(T rho, T u[], int nx, int ny, int nz, int q);
+
+tensor_field lbm_allocate_and_ini_at_equilibrium_kida(T rho0, T u_max, int nx, int ny, int nz, int q);
+
+scalar_field lbm_compute_density(tensor_field lattice);
+
+tensor_field lbm_compute_velocity(tensor_field lattice);
+
+#endif
\ No newline at end of file
diff --git a/presentations/pasc/code/c_interface/point.c b/presentations/pasc/code/c_interface/point.c
new file mode 100644
index 0000000000000000000000000000000000000000..902d69ccefea37cfe4facbaf00728b9fe2853cb7
--- /dev/null
+++ b/presentations/pasc/code/c_interface/point.c
@@ -0,0 +1,14 @@
+#include "point.h"
+
+point point_create(T x, T y, T z) {
+    point p;
+    p.x = x;
+    p.y = y;
+    p.z = z;
+
+    return p;
+}
+
+point point_create_default() {
+    return point_create((T)0, (T)0, (T)0);
+}
\ No newline at end of file
diff --git a/presentations/pasc/code/c_interface/point.h b/presentations/pasc/code/c_interface/point.h
new file mode 100644
index 0000000000000000000000000000000000000000..df80156c7e23988916ec9a9a6d8c094d237d8fb0
--- /dev/null
+++ b/presentations/pasc/code/c_interface/point.h
@@ -0,0 +1,13 @@
+#ifndef POINT_H
+#define POINT_H
+
+#include "globals.h"
+
+typedef struct {
+    T x, y, z;
+} point;
+
+point point_create(T x, T y, T z);
+point point_create_default();
+
+#endif
diff --git a/presentations/pasc/code/c_interface/scalar_field.c b/presentations/pasc/code/c_interface/scalar_field.c
new file mode 100644
index 0000000000000000000000000000000000000000..339bbfaf16d68e818fb6c29f78a4dae4db16669d
--- /dev/null
+++ b/presentations/pasc/code/c_interface/scalar_field.c
@@ -0,0 +1,134 @@
+#include "scalar_field.h"
+#include "scalar_field_2d.h"
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+#include <stdio.h>
+#include <assert.h>
+
+bool scalar_field_consistant_size_scalar_field(scalar_field rhs, scalar_field lhs) {
+    return (rhs.nx == lhs.nx && rhs.ny == lhs.ny && rhs.nz == lhs.nz);
+}
+
+scalar_field scalar_field_allocate(fint nx, fint ny, fint nz) {
+    scalar_field field;
+    field.nx = nx;
+    field.ny = ny;
+    field.nz = nz;
+    
+    field.data = malloc(sizeof(T) * nx * ny * nz);
+    field.grid = malloc(sizeof(T **) * nx);
+    for (fint ix = 0; ix < nx; ++ix) {
+        field.grid[ix] = malloc(sizeof(T *) * ny);
+        for (fint iy = 0; iy < ny; ++iy) {
+            field.grid[ix][iy] = field.data + nz * (iy + ny * ix);
+        }
+    }
+    return field;
+}
+
+void scalar_field_deallocate(scalar_field field) {
+    free(field.data);
+    for (fint ix = 0; ix < field.nx; ++ix) {
+        free(field.grid[ix]);
+    }
+    free(field.grid);
+}
+
+T scalar_field_compute_max(scalar_field field) {
+#ifdef DBL
+    T max = DBL_MIN;
+#else
+    T max = FLT_MIN;
+#endif
+    for (fint ix = 0; ix < field.nx; ++ix) {
+		for (fint iy = 0; iy < field.ny; ++iy){
+    		for (fint iz = 0; iz < field.nz; ++iz){
+                if (field.grid[ix][iy][iz] > max) {
+                    max = field.grid[ix][iy][iz];
+                }
+            }
+        }
+    }
+    return max;
+}
+
+T scalar_field_compute_min(scalar_field field) {
+#ifdef DBL
+    T min = DBL_MAX;
+#else
+    T min = FLT_MAX;
+#endif
+    for(int ix = 0; ix < field.nx; ++ix) {
+		for(int iy = 0; iy < field.ny; ++iy){
+    		for (fint iz = 0; iz < field.nz; ++iz){
+                if (field.grid[ix][iy][iz] < min) {
+                    min = field.grid[ix][iy][iz];
+                }
+            }
+        }
+    }
+    return min;
+}
+
+scalar_field_2d scalar_field_extract_2d_x(scalar_field field, int ix) {
+    scalar_field_2d field_2d = scalar_field_2d_allocate(field.ny, field.nz);
+
+    for (fint iy = 0; iy < field.ny; ++iy) {
+        for (fint iz = 0; iz < field.nz; ++iz) {
+            field_2d.grid[ix][iy] = field.grid[ix][iy][iz];
+        }
+    }
+    return field_2d;
+}
+
+scalar_field_2d scalar_field_extract_2d_y(scalar_field field, int iy) {
+    scalar_field_2d field_2d = scalar_field_2d_allocate(field.nx, field.nz);
+
+    for (fint ix = 0; ix < field.nx; ++ix) {
+        for (fint iz = 0; iz < field.nz; ++iz) {
+            field_2d.grid[ix][iy] = field.grid[ix][iy][iz];
+        }
+    }
+    return field_2d;
+}
+
+scalar_field_2d scalar_field_extract_2d_z(scalar_field field, int iz) {
+    scalar_field_2d field_2d = scalar_field_2d_allocate(field.nx, field.ny);
+    
+    for (fint ix = 0; ix < field.nx; ++ix) {
+        for (fint iy = 0; iy < field.ny; ++iy) {
+            field_2d.grid[ix][iy] = field.grid[ix][iy][iz];
+        }
+    }
+    return field_2d;
+}
+
+/**
+ * Ecriture du tableau des vitesses dans un fichier
+ **/
+void scalar_field_write_file(scalar_field field, char *dir, char *prefix, int i) {
+    // Create filename
+    char integer_string[128];
+    sprintf(integer_string, "%s%s%d", dir, prefix, i);
+    assert(strlen(integer_string) < 128);
+    char *filename = strcat(integer_string, ".dat");
+
+    // Open file
+    FILE *fp;
+    if ((fp = fopen(filename, "w")) == NULL) {
+        printf("Error ! opening file");
+        exit(EXIT_FAILURE);
+    }
+
+    // Fill file
+    for (fint x = 0; x < field.nx; x++) {
+        for (fint y = 0; y < field.ny; y++) {
+            for (fint z = 0; z < field.nz; z++) {
+                fprintf(fp, "%f ", field.grid[x][y][z]);
+            }
+        }
+    }
+
+    fclose(fp);
+}
diff --git a/presentations/pasc/code/c_interface/scalar_field.h b/presentations/pasc/code/c_interface/scalar_field.h
new file mode 100644
index 0000000000000000000000000000000000000000..813847778485aba9535a81a0ffa56907a85b6a0a
--- /dev/null
+++ b/presentations/pasc/code/c_interface/scalar_field.h
@@ -0,0 +1,32 @@
+#ifndef SCALAR_FIELD_H
+#define SCALAR_FIELD_H
+
+#include <stdbool.h>
+#include "globals.h"
+#include "scalar_field_2d.h"
+
+typedef struct {
+    T *data;
+    T ***grid;
+    fint nx, ny, nz;
+} scalar_field;
+
+bool scalar_field_consistant_size_scalar_field(scalar_field rhs, scalar_field lhs);
+
+scalar_field scalar_field_allocate(fint nx, fint ny, fint nz);
+
+void scalar_field_deallocate(scalar_field field);
+
+T scalar_field_compute_min(scalar_field field);
+
+T scalar_field_compute_max(scalar_field field);
+
+void scalar_field_write_file(scalar_field field, char *dir, char *prefix, int i);
+
+scalar_field_2d scalar_field_extract_2d_x(scalar_field field, int ix);
+
+scalar_field_2d scalar_field_extract_2d_y(scalar_field field, int iy);
+
+scalar_field_2d scalar_field_extract_2d_z(scalar_field field, int iz);
+
+#endif
\ No newline at end of file
diff --git a/presentations/pasc/code/c_interface/scalar_field_2d.c b/presentations/pasc/code/c_interface/scalar_field_2d.c
new file mode 100644
index 0000000000000000000000000000000000000000..be51f050c84d8adf5c41addf0d22c2f3ffeee6d3
--- /dev/null
+++ b/presentations/pasc/code/c_interface/scalar_field_2d.c
@@ -0,0 +1,83 @@
+#include "scalar_field_2d.h"
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+#include <stdio.h>
+#include <assert.h>
+
+scalar_field_2d scalar_field_2d_allocate(fint nx, fint ny) {
+    scalar_field_2d field;
+    field.nx = nx;
+    field.ny = ny;
+    
+    field.data = malloc(sizeof(T) * nx * ny);
+    field.grid = malloc(sizeof(T *) * nx);
+    for (fint ix = 0; ix < nx; ++ix) {
+        field.grid[ix] = field.data + ny * ix;
+    }
+    return field;
+}
+
+void scalar_field_2d_deallocate(scalar_field_2d field) {
+    free(field.data);
+    free(field.grid);
+}
+
+T scalar_field_2d_compute_max(scalar_field_2d field) {
+#ifdef DBL
+    T max = DBL_MIN;
+#else
+    T max = FLT_MIN;
+#endif
+    for (fint ix = 0; ix < field.nx; ++ix) {
+		for (fint iy = 0; iy < field.ny; ++iy){
+            if (field.grid[ix][iy] > max) {
+                max = field.grid[ix][iy];
+            }
+        }
+    }
+    return max;
+}
+
+T scalar_field_2d_compute_min(scalar_field_2d field) {
+#ifdef DBL
+    T min = DBL_MAX;
+#else
+    T min = FLT_MAX;
+#endif
+    for(int ix = 0; ix < field.nx; ++ix) {
+		for(int iy = 0; iy < field.ny; ++iy){
+            if (field.grid[ix][iy] < min) {
+                min = field.grid[ix][iy];
+            }
+        }
+    }
+    return min;
+}
+
+/**
+ * Ecriture du tableau des vitesses dans un fichier
+ **/
+void scalar_field_2d_write_file(scalar_field_2d field, char *dir, char *prefix, int i) {
+    // Create filename
+    char integer_string[128];
+    sprintf(integer_string, "%s%s%d", dir, prefix, i);
+    assert(strlen(integer_string) < 128);
+    char *filename = strcat(integer_string, ".dat");
+
+    // Open file
+    FILE *fp;
+    if ((fp = fopen(filename, "w")) == NULL) {
+        printf("Error ! opening file");
+        exit(EXIT_FAILURE);
+    }
+
+    // Fill file
+    for (fint ix = 0; ix < field.nx; ix++) {
+        for (fint iy = 0; iy < field.ny; iy++) {
+            fprintf(fp, "%f ", field.grid[ix][iy]);
+        }
+    }
+
+    fclose(fp);
+}
diff --git a/presentations/pasc/code/c_interface/scalar_field_2d.h b/presentations/pasc/code/c_interface/scalar_field_2d.h
new file mode 100644
index 0000000000000000000000000000000000000000..0ab22c5e5a430c7385bda192e8c2a8a381d42b01
--- /dev/null
+++ b/presentations/pasc/code/c_interface/scalar_field_2d.h
@@ -0,0 +1,22 @@
+#ifndef SCALAR_FIELD_2D_H
+#define SCALAR_FIELD_2D_H
+
+#include "globals.h"
+
+typedef struct {
+    T *data;
+    T **grid;
+    fint nx, ny;
+} scalar_field_2d;
+
+scalar_field_2d scalar_field_2d_allocate(fint nx, fint ny);
+
+void scalar_field_2d_deallocate(scalar_field_2d field);
+
+T scalar_field_2d_compute_min(scalar_field_2d field);
+
+T scalar_field_2d_compute_max(scalar_field_2d field);
+
+void scalar_field_2d_write_file(scalar_field_2d field, char *dir, char *prefix, int i);
+
+#endif
\ No newline at end of file
diff --git a/presentations/pasc/code/c_interface/tensor_field.c b/presentations/pasc/code/c_interface/tensor_field.c
new file mode 100644
index 0000000000000000000000000000000000000000..1bfd6bcacb225fad95986fd94d5d7a9580abb61a
--- /dev/null
+++ b/presentations/pasc/code/c_interface/tensor_field.c
@@ -0,0 +1,113 @@
+#include "tensor_field.h"
+#include <math.h>
+#include <stdlib.h>
+#include <assert.h>
+
+bool tensor_field_consistant_spatial_size_tensor_field(tensor_field lhs, tensor_field rhs) {
+    return (rhs.nx == lhs.nx && rhs.ny == lhs.ny && rhs.nz == lhs.nz);
+}
+
+bool tensor_field_consistant_size_tensor_field(tensor_field lhs, tensor_field rhs) {
+    return (rhs.nx == lhs.nx && rhs.ny == lhs.ny && rhs.nz == lhs.nz && rhs.q == lhs.q);
+}
+
+bool tensor_field_consistant_size_scalar_field(tensor_field lhs, scalar_field rhs) {
+    return (rhs.nx == lhs.nx && rhs.ny == lhs.ny && rhs.nz == lhs.nz);
+}
+
+tensor_field tensor_field_allocate(fint nx, fint ny, fint nz, fint q) {
+    tensor_field field;
+    field.nx = nx;
+    field.ny = ny;
+    field.nz = nz;
+    field.q  = q;
+    field.dim = 3;
+    
+    field.data = malloc(sizeof(T) * nx * ny * nz * q);
+    field.grid = malloc(sizeof(T ***) * nx);
+    for (fint ix = 0; ix < nx; ++ix) {
+        field.grid[ix] = malloc(sizeof(T **) * ny);
+        for (fint iy = 0; iy < ny; ++iy) {
+            field.grid[ix][iy] = malloc(sizeof(T *) * nz);
+            for (fint iz = 0; iz < nz; ++iz) {
+                field.grid[ix][iy][iz] = field.data + q * (iz + nz * (iy + ny * ix));
+            }
+        }
+    }
+    return field;
+}
+
+void tensor_field_deallocate(tensor_field field) {
+    free(field.data);
+
+    for (fint ix = 0; ix < field.nx; ++ix) {
+        for (fint iy = 0; iy < field.ny; ++iy) {
+            free(field.grid[ix][iy]);
+        }
+        free(field.grid[ix]);
+    }
+    free(field.grid);
+}
+
+void tensor_field_from_futhark_f32_4d_inplace(struct futhark_f32_4d *f, struct futhark_context *ctx, tensor_field field) {
+    int64_t *shape = futhark_shape_f32_4d(ctx, f);
+    fint nx = shape[0];
+    fint ny = shape[1];
+    fint nz = shape[2];
+    fint q = shape[3];
+
+    assert(nx == field.nx);
+    assert(ny == field.ny);
+    assert(nz == field.nz);
+    assert(q == field.q);
+
+    futhark_values_f32_4d(ctx, f, field.data);
+}
+
+void tensor_field_compute_norm_inplace(tensor_field field, scalar_field norm) {
+    assert(norm.nx == field.nx);
+    assert(norm.ny == field.ny);
+    assert(norm.nz == field.nz);
+
+    for (int ix = 0; ix < field.nx; ++ix) {
+        for (int iy = 0; iy < field.ny; ++iy) {
+            for (int iz = 0; iz < field.nz; ++iz) {
+                T n = 0.0;
+                for (int id = 0; id < field.q; ++id) {
+                    n += field.grid[ix][iy][iz][id] * field.grid[ix][iy][iz][id];
+                }
+                norm.grid[ix][iy][iz] = sqrt(n);
+            }
+        }
+    }
+}
+
+tensor_field tensor_field_from_futhark_f32_4d(struct futhark_f32_4d *f, struct futhark_context *ctx) {
+    int64_t *shape = futhark_shape_f32_4d(ctx, f);
+    fint nx = shape[0];
+    fint ny = shape[1];
+    fint nz = shape[2];
+    fint q = shape[3];
+
+    tensor_field field = tensor_field_allocate(nx, ny, nz, q);
+
+    tensor_field_from_futhark_f32_4d_inplace(f, ctx, field);
+
+    return field;
+}
+
+struct futhark_f32_4d *tensor_field_to_futhark_f32_4d(tensor_field field, struct futhark_context *ctx) {
+    
+    struct futhark_f32_4d *f = futhark_new_f32_4d(ctx,
+                                          field.data, field.nx, field.ny, field.nz, field.q);
+    return f;
+}
+
+scalar_field tensor_field_compute_norm(tensor_field field) {
+    scalar_field norm = scalar_field_allocate(field.nx, field.ny, field.nz);
+
+    tensor_field_compute_norm_inplace(field, norm);
+
+    return norm;
+}
+
diff --git a/presentations/pasc/code/c_interface/tensor_field.h b/presentations/pasc/code/c_interface/tensor_field.h
new file mode 100644
index 0000000000000000000000000000000000000000..1cce5b8a98b09f9edaecee6cddcc9b06f84b6684
--- /dev/null
+++ b/presentations/pasc/code/c_interface/tensor_field.h
@@ -0,0 +1,33 @@
+#ifndef TENSOR_FIELD_H
+#define TENSOR_FIELD_H
+
+#include "globals.h"
+#include "scalar_field.h"
+#include "../liblbm.h"
+
+typedef struct {
+    T *data;
+    T ****grid;
+    fint nx, ny, nz, q;
+    fint dim;
+} tensor_field;
+
+bool tensor_field_consistant_spatial_size_tensor_field(tensor_field lhs, tensor_field rhs);
+bool tensor_field_consistant_size_tensor_field(tensor_field lhs, tensor_field rhs);
+bool tensor_field_consistant_size_scalar_field(tensor_field lhs, scalar_field rhs);
+
+tensor_field tensor_field_allocate(fint nx, fint ny, fint nz, fint q);
+
+void tensor_field_deallocate(tensor_field field);
+
+void tensor_field_from_futhark_f32_4d_inplace(struct futhark_f32_4d *f_in, struct futhark_context *ctx, tensor_field field);
+void tensor_field_compute_norm_inplace(tensor_field field, scalar_field norm);
+
+tensor_field tensor_field_from_futhark_f32_4d(struct futhark_f32_4d *f_in, struct futhark_context *ctx);
+struct futhark_f32_4d *tensor_field_to_futhark_f32_4d(tensor_field field, struct futhark_context *ctx);
+
+scalar_field tensor_field_compute_norm(tensor_field field);
+
+
+
+#endif
\ No newline at end of file
diff --git a/presentations/pasc/code/c_interface/tensor_field_2d.c b/presentations/pasc/code/c_interface/tensor_field_2d.c
new file mode 100644
index 0000000000000000000000000000000000000000..da1a802845c6daba809fc0f7c6a9076f08a38eb3
--- /dev/null
+++ b/presentations/pasc/code/c_interface/tensor_field_2d.c
@@ -0,0 +1,48 @@
+#include "tensor_field_2d.h"
+#include <math.h>
+#include <stdlib.h>
+
+tensor_field_2d tensor_field_2d_allocate(fint nx, fint ny, fint q) {
+    tensor_field_2d field;
+    field.nx = nx;
+    field.ny = ny;
+    field.q  = q;
+    field.dim = 2;
+    
+    field.data = malloc(sizeof(T) * nx * ny * q);
+    field.grid = malloc(sizeof(T **) * nx);
+    for (fint ix = 0; ix < nx; ++ix) {
+        field.grid[ix] = malloc(sizeof(T *) * ny);
+        for (fint iy = 0; iy < ny; ++iy) {
+            field.grid[ix][iy] = field.data + q * (iy + ny * ix);
+        }
+    }
+    return field;
+}
+
+void tensor_field_2d_deallocate(tensor_field_2d field) {
+    free(field.data);
+
+    for (fint ix = 0; ix < field.nx; ++ix) {
+        for (fint iy = 0; iy < field.ny; ++iy) {
+            free(field.grid[ix][iy]);
+        }
+        free(field.grid[ix]);
+    }
+}
+
+scalar_field_2d tensor_field_2d_compute_norm(tensor_field_2d field) {
+    scalar_field_2d norm = scalar_field_2d_allocate(field.nx, field.ny);
+
+    for (int ix = 0; ix < field.nx; ++ix) {
+        for (int iy = 0; iy < field.ny; ++iy) {
+            T n = 0.0;
+            for (int id = 0; id < field.q; ++id) {
+                n += field.grid[ix][iy][id] * field.grid[ix][iy][id];
+            }
+            norm.grid[ix][iy] = sqrt(n);
+        }
+    }
+    return norm;
+}
+
diff --git a/presentations/pasc/code/c_interface/tensor_field_2d.h b/presentations/pasc/code/c_interface/tensor_field_2d.h
new file mode 100644
index 0000000000000000000000000000000000000000..86f3857ac9a46bcde21e1f7e1fc48bd689804c18
--- /dev/null
+++ b/presentations/pasc/code/c_interface/tensor_field_2d.h
@@ -0,0 +1,22 @@
+#ifndef TENSOR_FIELD_2D_H
+#define TENSOR_FIELD_2D_H
+
+#include "globals.h"
+#include "scalar_field_2d.h"
+#include "../liblbm.h"
+
+typedef struct {
+    T *data;
+    T ***grid;
+    fint nx, ny, q;
+    fint dim;
+} tensor_field_2d;
+
+tensor_field_2d tensor_field_2d_allocate(fint nx, fint ny, fint q);
+
+void tensor_field_2d_deallocate(tensor_field_2d field);
+
+scalar_field_2d tensor_field_2d_compute_norm(tensor_field_2d field);
+
+
+#endif
\ No newline at end of file
diff --git a/presentations/pasc/code/c_interface/units.c b/presentations/pasc/code/c_interface/units.c
new file mode 100644
index 0000000000000000000000000000000000000000..dc04fff385cb2003174f6a4765c6c80106c7929f
--- /dev/null
+++ b/presentations/pasc/code/c_interface/units.c
@@ -0,0 +1,53 @@
+#include <assert.h>
+#include <stdio.h>
+#include "units.h"
+#include "utils.h"
+#include "lbm_utils.h"
+
+units_converter units_converter_create_inertial(T Re, T l_phys, fint N, T u_phys, T u_lb, T lx, T ly, T lz) {
+    units_converter units;
+
+    // imposed quantities
+    units.Re = Re;
+    units.l_phys = l_phys;
+    units.N = N;
+    units.u_phys = u_phys;
+    units.u_lb = u_lb;
+    units.lx = lx;
+    units.ly = ly;
+    units.lz = lz;
+    
+    // derived quantities phys
+    units.nu_phys = units.u_phys * units.l_phys / units.Re;
+
+
+    // derived quantities lb
+    units.delta_x = units.l_phys / N;
+    units.delta_t = units.delta_x * units.u_lb / units.u_phys;
+    units.nu_lb = units.u_lb * units.N / units.Re;
+    assert(is_approx_equal(units.nu_lb * sqr_f(units.delta_x) / units.delta_t, units.nu_phys, 1.0e-7));
+    T tau = units.nu_lb / cs2 + 0.5;
+    units.omega = (T)1 / tau;
+
+
+    units.nx = (fint)(round_to_int(units.lx * N) + 1);
+    units.ny = (fint)(round_to_int(units.ly * N)+ 1);
+    units.nz = (fint)(round_to_int(units.lz * N) + 1);
+
+    return units;
+}
+
+void units_converter_print(units_converter units) {
+    printf("==========================================\n");
+    printf("             Parameters                   \n");
+    printf("==========================================\n");
+    printf("\n");
+    printf("Re = %f\n", units.Re);
+    printf("l_phys = %f, u_phys = %f, nu_phys = %f\n", units.l_phys, units.u_phys, units.nu_phys);
+    printf("N = %d\n", units.N);
+    printf("u_lb = %f, nu_lb = %f\n", units.u_lb, units.nu_lb);
+    printf("omega = %f\n", units.omega);
+    printf("lx = %f, ly = %f, lz = %f\n", units.lx, units.ly, units.lz);
+    printf("delta_x = %f, delta_t = %f\n", units.delta_x, units.delta_t);
+    printf("nx = %d, ny = %d, nz = %d\n", units.nx, units.ny, units.nz);
+}
diff --git a/presentations/pasc/code/c_interface/units.h b/presentations/pasc/code/c_interface/units.h
new file mode 100644
index 0000000000000000000000000000000000000000..13f2f7526e7f5e756bce262d0918e62e517e17f3
--- /dev/null
+++ b/presentations/pasc/code/c_interface/units.h
@@ -0,0 +1,21 @@
+#ifndef UNITS_H
+#define UNITS_H
+
+#include "globals.h"
+
+typedef struct {
+    T Re;
+    T l_phys, u_phys, nu_phys;
+    fint N;
+    T u_lb, nu_lb;
+    T omega;
+    T lx, ly, lz;
+    T delta_x, delta_t;
+    fint nx, ny, nz;
+} units_converter;
+
+units_converter units_converter_create_inertial(T Re, T l_phys, fint N, T u_phys, T u_lb, T lx, T ly, T lz);
+
+void units_converter_print(units_converter units);
+
+#endif
\ No newline at end of file
diff --git a/presentations/pasc/code/c_interface/utils.c b/presentations/pasc/code/c_interface/utils.c
new file mode 100644
index 0000000000000000000000000000000000000000..14cb99850de8f5c31643ceb823a31a5c2f9957ee
--- /dev/null
+++ b/presentations/pasc/code/c_interface/utils.c
@@ -0,0 +1,41 @@
+#include <dirent.h>
+#include <errno.h>
+#include <assert.h>
+#include "utils.h"
+
+fint round_to_int(T a) {
+    return (fint)(a + 0.5);
+}
+
+
+T sqr_f(T a) {
+    return a * a;
+}
+
+fint sqr_i(fint a) {
+    return a * a;
+}
+
+fint max(fint a, fint b) {
+    if (a > b) {
+        return a;
+    }
+    return b;
+}
+
+bool is_approx_equal(T a, T b, T eps) {
+    return (fabs(a - b) < eps);
+}
+
+void check_dir(const char * const prefix) {
+    DIR* dir = opendir(prefix);
+    if (dir) {
+        /* Directory exists. */
+        closedir(dir);
+    } else if (ENOENT == errno) {
+        assert(false && "Error directory does not exist.");
+    } else {
+        assert(false && "Error opendir failed.");
+    }
+}
+
diff --git a/presentations/pasc/code/c_interface/utils.h b/presentations/pasc/code/c_interface/utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..a3178d4f00b7a542c09997410fdd748d45d66db3
--- /dev/null
+++ b/presentations/pasc/code/c_interface/utils.h
@@ -0,0 +1,19 @@
+#ifndef UTILS_H
+#define UTILS_H
+
+#include "globals.h"
+#include <stdbool.h>
+
+fint round_to_int(T a);
+
+T sqr_f(T a);
+
+fint sqr_i(fint a);
+
+fint max(fint a, fint b);
+
+bool is_approx_equal(T a, T b, T eps);
+
+void check_dir(const char *const prefix);
+
+#endif
diff --git a/presentations/pasc/code/c_interface/vtk.c b/presentations/pasc/code/c_interface/vtk.c
new file mode 100644
index 0000000000000000000000000000000000000000..db0913c0bca718f6cf90d58cdf7e712a35bfd455
--- /dev/null
+++ b/presentations/pasc/code/c_interface/vtk.c
@@ -0,0 +1,192 @@
+#include <stdbool.h>
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "box.h"
+#include "point.h"
+#include "vtk.h"
+#include "scalar_field.h"
+#include "tensor_field.h"
+
+static void write_header(box domain, point origin, double delta_x, bool point_data, FILE *fp);
+static void start_piece(box domain, bool point_data, FILE *fp);
+static void end_piece(bool point_data, FILE *fp);
+static void write_footer(FILE *fp);
+static void vtk_file_write_header(vtk_file *vtk);
+static void write_scalar_field(scalar_field field, char *name, FILE *fp);
+static void write_tensor_field(tensor_field field, char *name, FILE *fp);
+static char *get_float_type();
+
+char *vtk_create_fname(char const * const prefix, char *name, fint i) {
+    char *buffer = malloc(80 * sizeof(char));
+    sprintf(buffer, "%s%s_%08d.vti", prefix, name, i);
+    assert(strlen(buffer) < 80 && "Filename too long.");
+    return buffer;
+}
+
+
+vtk_file vtk_file_create(char *fname, box domain, point origin, T delta_x, bool point_data) {
+    vtk_file vtk;
+    vtk.domain = domain;
+    vtk.origin = origin;
+    vtk.delta_x = delta_x;
+    vtk.point_data = point_data;
+    vtk.header_written = false;
+
+    FILE *file = fopen (fname,"w");
+    assert(file != 0 && "Could not open file.");
+    vtk.fp = file;
+
+    return vtk;
+}
+
+vtk_file vtk_file_create_default(char *fname, box domain, T delta_x) {
+    point offset = point_create_default();
+    return vtk_file_create(fname, domain, offset, delta_x, true);
+}
+
+void vtk_file_write_scalar_field(vtk_file *vtk, scalar_field field, char *name) {
+    vtk_file_write_header(vtk);
+    write_scalar_field(field, name, vtk->fp);
+}
+
+void vtk_file_write_tensor_field(vtk_file *vtk, tensor_field field, char *name) {
+    vtk_file_write_header(vtk);
+    write_tensor_field(field, name, vtk->fp);
+}
+
+void vtk_file_close(vtk_file *vtk) {
+    assert(vtk->header_written && "Trying to close a file where nothing has been written");
+    end_piece(vtk->point_data, vtk->fp);
+    write_footer(vtk->fp);
+    fint is_closed = fclose(vtk->fp);
+    assert(is_closed == 0 && "Error file not closed properly.");
+    vtk->header_written = false;
+}
+
+static void vtk_file_write_header(vtk_file *vtk) {
+    if (!vtk->header_written) {
+        write_header(vtk->domain, vtk->origin, vtk->delta_x, vtk->point_data, vtk->fp);
+        start_piece(vtk->domain, vtk->point_data, vtk->fp);
+        vtk->header_written = true;
+    }
+}
+
+static void write_scalar_field(scalar_field field, char *name, FILE *fp) {
+    assert(fp != 0 && "File not open.");
+    
+    char *flt = get_float_type();
+    assert(flt != NULL && "Not a float");
+
+    fprintf(fp, "<DataArray type=\"%s\" Name=\"%s\" format=\"ascii\" NumberOfComponents=\"%d\">\n", flt, name, 1);
+    for (fint ix = 0; ix < field.nx; ++ix) {
+        for (fint iy = 0; iy < field.ny; ++iy) {
+            for (fint iz = 0; iz < field.nz; ++iz) {
+                // fprintf(fp, "%f ", field.grid[ix][iy][iz]);
+                
+                if (strcmp(flt, "Float32") == 0) {
+                        fprintf(fp, "%.9g ", field.grid[ix][iy][iz]);
+                } else if (strcmp(flt, "Float64") == 0) {
+                    fprintf(fp, "%.17g ", field.grid[ix][iy][iz]);
+                } else {
+                    assert(false && "Error with the flt_type.");
+                }
+            }
+        }
+    }
+    fprintf(fp, "\n</DataArray>\n");
+}
+
+static void write_tensor_field(tensor_field field, char *name, FILE *fp) {
+    assert(fp != 0 && "File not open.");
+
+    char *flt = get_float_type();
+    assert(flt != NULL && "Not a float");
+
+    fprintf(fp, "<DataArray type=\"%s\" Name=\"%s\" format=\"ascii\" NumberOfComponents=\"%d\">\n", flt, name, field.q);
+    for (fint ix = 0; ix < field.nx; ++ix) {
+        for (fint iy = 0; iy < field.ny; ++iy) {
+            for (fint iz = 0; iz < field.nz; ++iz) {
+                for (fint iq = 0; iq < field.q; ++iq) {
+                    // fprintf(fp, "%f ", field.grid[ix][iy][iz][iq]);
+                    
+                    if (strcmp(flt, "Float32") == 0) {
+                        fprintf(fp, "%.9g ", field.grid[ix][iy][iz][iq]);
+                    } else if (strcmp(flt, "Float64") == 0) {
+                        fprintf(fp, "%.17g ", field.grid[ix][iy][iz][iq]);
+                    } else {
+                        assert(false && "Error with the flt_type.");
+                    }
+                }
+            }
+        }
+    }
+    fprintf(fp, "\n</DataArray>\n");
+}
+
+static void write_header(box domain, point origin, double delta_x, bool point_data, FILE *fp) {
+    assert(fp != 0 && "File not open.");
+
+    fprintf(fp, "<?xml version=\"1.0\"?>\n");
+    fprintf(fp, "<VTKFile type=\"ImageData\" version=\"0.1\" byte_order=\"LittleEndian\">\n");
+    if (!point_data) {
+        origin.x -= delta_x/2.;
+        origin.y -= delta_x/2.;
+        origin.z -= delta_x/2.;
+        domain.x1++;
+        domain.y1++;
+        domain.z1++;
+    }
+
+    fprintf(fp, "<ImageData WholeExtent=\"%d %d %d %d %d %d\" ", domain.x0, domain.x1, domain.y0, domain.y1, domain.z0, domain.z1);
+    fprintf(fp, "Origin=\"%f %f %f\" ", origin.x, origin.y, origin.z);
+    fprintf(fp, "Spacing=\"%f %f %f\">\n", delta_x, delta_x, delta_x);
+}
+
+static void start_piece(box domain, bool point_data, FILE *fp) {
+    assert(fp != 0 && "File not open.");
+
+    if (!point_data) {
+        domain.x1++;
+        domain.y1++;
+        domain.z1++;
+    }
+    fprintf(fp, "<Piece Extent=\"%d %d %d %d %d %d\">\n", domain.x0, domain.x1, domain.y0, domain.y1, domain.z0, domain.z1);
+
+    if (point_data) {
+        fprintf(fp, "<PointData>\n");
+    } else {
+        fprintf(fp, "<CellData>\n");
+    }
+}
+
+static void end_piece(bool point_data, FILE *fp) {
+    assert(fp != 0 && "File not open.");
+
+    if (point_data) {
+        fprintf(fp, "</PointData>\n");
+    }
+    else {
+        fprintf(fp, "</CellData>\n");
+    }
+    fprintf(fp, "</Piece>\n");
+}
+
+static void write_footer(FILE *fp) {
+    assert(fp != 0 && "File not open.");
+
+    fprintf(fp, "</ImageData>\n");
+    fprintf(fp, "</VTKFile>\n");
+}
+
+static char *get_float_type() {
+    if (8 == sizeof(T)) {
+        return "Float64";
+    } else if (4 == sizeof(T)) {
+        return "Float32";
+    } else {
+        return NULL;
+    }
+}
+
diff --git a/presentations/pasc/code/c_interface/vtk.h b/presentations/pasc/code/c_interface/vtk.h
new file mode 100644
index 0000000000000000000000000000000000000000..f26fbe0b83e2b7e6908f3bbe6d6bcd599c654345
--- /dev/null
+++ b/presentations/pasc/code/c_interface/vtk.h
@@ -0,0 +1,32 @@
+#ifndef VTK_H
+#define VTK_H
+
+#include <stdio.h>
+#include <stdbool.h>
+#include "box.h"
+#include "point.h"
+#include "scalar_field.h"
+#include "tensor_field.h"
+
+typedef struct {
+    FILE *fp;
+    box domain;
+    point origin;
+    T delta_x;
+    bool point_data;
+    bool header_written;
+} vtk_file;
+
+vtk_file vtk_file_create(char *fname, box domain, point origin, T delta_x, bool point_data);
+
+vtk_file vtk_file_create_default(char *fname, box domain, T delta_x);
+
+void vtk_file_write_scalar_field(vtk_file *vtk, scalar_field field, char *name);
+
+void vtk_file_write_tensor_field(vtk_file *vtk, tensor_field field, char *name);
+
+void vtk_file_close(vtk_file *vtk);
+
+char *vtk_create_fname(char const * const prefix, char *name, fint i);
+
+#endif
\ No newline at end of file
diff --git a/presentations/pasc/code/data_analysis.c b/presentations/pasc/code/data_analysis.c
new file mode 100644
index 0000000000000000000000000000000000000000..283350a3603da835bb44593014a5c139d92dcf1e
--- /dev/null
+++ b/presentations/pasc/code/data_analysis.c
@@ -0,0 +1,2964 @@
+#ifdef __GNUC__
+#pragma GCC diagnostic ignored "-Wunused-function"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wparentheses"
+#pragma GCC diagnostic ignored "-Wunused-label"
+#endif
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wunused-function"
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wparentheses"
+#pragma clang diagnostic ignored "-Wunused-label"
+#endif
+// Headers
+
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <float.h>
+
+
+// Initialisation
+
+struct futhark_context_config ;
+struct futhark_context_config *futhark_context_config_new(void);
+void futhark_context_config_free(struct futhark_context_config *cfg);
+void futhark_context_config_set_debugging(struct futhark_context_config *cfg,
+                                          int flag);
+void futhark_context_config_set_logging(struct futhark_context_config *cfg,
+                                        int flag);
+struct futhark_context ;
+struct futhark_context *futhark_context_new(struct futhark_context_config *cfg);
+void futhark_context_free(struct futhark_context *ctx);
+
+// Arrays
+
+
+// Opaque values
+
+
+// Entry points
+
+
+// Miscellaneous
+
+int futhark_context_sync(struct futhark_context *ctx);
+int futhark_context_clear_caches(struct futhark_context *ctx);
+char *futhark_context_report(struct futhark_context *ctx);
+char *futhark_context_get_error(struct futhark_context *ctx);
+void futhark_context_pause_profiling(struct futhark_context *ctx);
+void futhark_context_unpause_profiling(struct futhark_context *ctx);
+#define FUTHARK_BACKEND_c
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <math.h>
+#include <stdint.h>
+#undef NDEBUG
+#include <assert.h>
+#include <stdarg.h>
+// Start of util.h.
+//
+// Various helper functions that are useful in all generated C code.
+
+#include <errno.h>
+#include <string.h>
+
+static const char *fut_progname = "(embedded Futhark)";
+
+static void futhark_panic(int eval, const char *fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  fprintf(stderr, "%s: ", fut_progname);
+  vfprintf(stderr, fmt, ap);
+  va_end(ap);
+  exit(eval);
+}
+
+// For generating arbitrary-sized error messages.  It is the callers
+// responsibility to free the buffer at some point.
+static char* msgprintf(const char *s, ...) {
+  va_list vl;
+  va_start(vl, s);
+  size_t needed = 1 + (size_t)vsnprintf(NULL, 0, s, vl);
+  char *buffer = (char*) malloc(needed);
+  va_start(vl, s); // Must re-init.
+  vsnprintf(buffer, needed, s, vl);
+  return buffer;
+}
+
+
+static inline void check_err(int errval, int sets_errno, const char *fun, int line,
+                            const char *msg, ...) {
+  if (errval) {
+    char errnum[10];
+
+    va_list vl;
+    va_start(vl, msg);
+
+    fprintf(stderr, "ERROR: ");
+    vfprintf(stderr, msg, vl);
+    fprintf(stderr, " in %s() at line %d with error code %s\n",
+            fun, line,
+            sets_errno ? strerror(errno) : errnum);
+    exit(errval);
+  }
+}
+
+#define CHECK_ERR(err, msg...) check_err(err, 0, __func__, __LINE__, msg)
+#define CHECK_ERRNO(err, msg...) check_err(err, 1, __func__, __LINE__, msg)
+
+// Read a file into a NUL-terminated string; returns NULL on error.
+static void* slurp_file(const char *filename, size_t *size) {
+  unsigned char *s;
+  FILE *f = fopen(filename, "rb"); // To avoid Windows messing with linebreaks.
+  if (f == NULL) return NULL;
+  fseek(f, 0, SEEK_END);
+  size_t src_size = ftell(f);
+  fseek(f, 0, SEEK_SET);
+  s = (unsigned char*) malloc(src_size + 1);
+  if (fread(s, 1, src_size, f) != src_size) {
+    free(s);
+    s = NULL;
+  } else {
+    s[src_size] = '\0';
+  }
+  fclose(f);
+
+  if (size) {
+    *size = src_size;
+  }
+
+  return s;
+}
+
+// Dump 'n' bytes from 'buf' into the file at the designated location.
+// Returns 0 on success.
+static int dump_file(const char *file, const void *buf, size_t n) {
+  FILE *f = fopen(file, "w");
+
+  if (f == NULL) {
+    return 1;
+  }
+
+  if (fwrite(buf, sizeof(char), n, f) != n) {
+    return 1;
+  }
+
+  if (fclose(f) != 0) {
+    return 1;
+  }
+
+  return 0;
+}
+
+struct str_builder {
+  char *str;
+  size_t capacity; // Size of buffer.
+  size_t used; // Bytes used, *not* including final zero.
+};
+
+static void str_builder_init(struct str_builder *b) {
+  b->capacity = 10;
+  b->used = 0;
+  b->str = malloc(b->capacity);
+  b->str[0] = 0;
+}
+
+static void str_builder(struct str_builder *b, const char *s, ...) {
+  va_list vl;
+  va_start(vl, s);
+  size_t needed = (size_t)vsnprintf(NULL, 0, s, vl);
+
+  while (b->capacity < b->used + needed + 1) {
+    b->capacity *= 2;
+    b->str = realloc(b->str, b->capacity);
+  }
+
+  va_start(vl, s); // Must re-init.
+  vsnprintf(b->str+b->used, b->capacity-b->used, s, vl);
+  b->used += needed;
+}
+
+// End of util.h.
+
+// Start of timing.h.
+
+// The function get_wall_time() returns the wall time in microseconds
+// (with an unspecified offset).
+
+#ifdef _WIN32
+
+#include <windows.h>
+
+static int64_t get_wall_time(void) {
+  LARGE_INTEGER time,freq;
+  assert(QueryPerformanceFrequency(&freq));
+  assert(QueryPerformanceCounter(&time));
+  return ((double)time.QuadPart / freq.QuadPart) * 1000000;
+}
+
+#else
+// Assuming POSIX
+
+#include <time.h>
+#include <sys/time.h>
+
+static int64_t get_wall_time(void) {
+  struct timeval time;
+  assert(gettimeofday(&time,NULL) == 0);
+  return time.tv_sec * 1000000 + time.tv_usec;
+}
+
+static int64_t get_wall_time_ns(void) {
+  struct timespec time;
+  assert(clock_gettime(CLOCK_REALTIME, &time) == 0);
+  return time.tv_sec * 1000000000 + time.tv_nsec;
+}
+
+#endif
+
+// End of timing.h.
+
+#include <string.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+// Start of values.h.
+
+//// Text I/O
+
+typedef int (*writer)(FILE*, const void*);
+typedef int (*bin_reader)(void*);
+typedef int (*str_reader)(const char *, void*);
+
+struct array_reader {
+  char* elems;
+  int64_t n_elems_space;
+  int64_t elem_size;
+  int64_t n_elems_used;
+  int64_t *shape;
+  str_reader elem_reader;
+};
+
+static void skipspaces(FILE *f) {
+  int c;
+  do {
+    c = getc(f);
+  } while (isspace(c));
+
+  if (c != EOF) {
+    ungetc(c, f);
+  }
+}
+
+static int constituent(char c) {
+  return isalnum(c) || c == '.' || c == '-' || c == '+' || c == '_';
+}
+
+// Produces an empty token only on EOF.
+static void next_token(FILE *f, char *buf, int bufsize) {
+ start:
+  skipspaces(f);
+
+  int i = 0;
+  while (i < bufsize) {
+    int c = getc(f);
+    buf[i] = (char)c;
+
+    if (c == EOF) {
+      buf[i] = 0;
+      return;
+    } else if (c == '-' && i == 1 && buf[0] == '-') {
+      // Line comment, so skip to end of line and start over.
+      for (; c != '\n' && c != EOF; c = getc(f));
+      goto start;
+    } else if (!constituent((char)c)) {
+      if (i == 0) {
+        // We permit single-character tokens that are not
+        // constituents; this lets things like ']' and ',' be
+        // tokens.
+        buf[i+1] = 0;
+        return;
+      } else {
+        ungetc(c, f);
+        buf[i] = 0;
+        return;
+      }
+    }
+
+    i++;
+  }
+
+  buf[bufsize-1] = 0;
+}
+
+static int next_token_is(FILE *f, char *buf, int bufsize, const char* expected) {
+  next_token(f, buf, bufsize);
+  return strcmp(buf, expected) == 0;
+}
+
+static void remove_underscores(char *buf) {
+  char *w = buf;
+
+  for (char *r = buf; *r; r++) {
+    if (*r != '_') {
+      *w++ = *r;
+    }
+  }
+
+  *w++ = 0;
+}
+
+static int read_str_elem(char *buf, struct array_reader *reader) {
+  int ret;
+  if (reader->n_elems_used == reader->n_elems_space) {
+    reader->n_elems_space *= 2;
+    reader->elems = (char*) realloc(reader->elems,
+                                    (size_t)(reader->n_elems_space * reader->elem_size));
+  }
+
+  ret = reader->elem_reader(buf, reader->elems + reader->n_elems_used * reader->elem_size);
+
+  if (ret == 0) {
+    reader->n_elems_used++;
+  }
+
+  return ret;
+}
+
+static int read_str_array_elems(FILE *f,
+                                char *buf, int bufsize,
+                                struct array_reader *reader, int64_t dims) {
+  int ret;
+  int first = 1;
+  char *knows_dimsize = (char*) calloc((size_t)dims, sizeof(char));
+  int cur_dim = dims-1;
+  int64_t *elems_read_in_dim = (int64_t*) calloc((size_t)dims, sizeof(int64_t));
+
+  while (1) {
+    next_token(f, buf, bufsize);
+
+    if (strcmp(buf, "]") == 0) {
+      if (knows_dimsize[cur_dim]) {
+        if (reader->shape[cur_dim] != elems_read_in_dim[cur_dim]) {
+          ret = 1;
+          break;
+        }
+      } else {
+        knows_dimsize[cur_dim] = 1;
+        reader->shape[cur_dim] = elems_read_in_dim[cur_dim];
+      }
+      if (cur_dim == 0) {
+        ret = 0;
+        break;
+      } else {
+        cur_dim--;
+        elems_read_in_dim[cur_dim]++;
+      }
+    } else if (strcmp(buf, ",") == 0) {
+      next_token(f, buf, bufsize);
+      if (strcmp(buf, "[") == 0) {
+        if (cur_dim == dims - 1) {
+          ret = 1;
+          break;
+        }
+        first = 1;
+        cur_dim++;
+        elems_read_in_dim[cur_dim] = 0;
+      } else if (cur_dim == dims - 1) {
+        ret = read_str_elem(buf, reader);
+        if (ret != 0) {
+          break;
+        }
+        elems_read_in_dim[cur_dim]++;
+      } else {
+        ret = 1;
+        break;
+      }
+    } else if (strlen(buf) == 0) {
+      // EOF
+      ret = 1;
+      break;
+    } else if (first) {
+      if (strcmp(buf, "[") == 0) {
+        if (cur_dim == dims - 1) {
+          ret = 1;
+          break;
+        }
+        cur_dim++;
+        elems_read_in_dim[cur_dim] = 0;
+      } else {
+        ret = read_str_elem(buf, reader);
+        if (ret != 0) {
+          break;
+        }
+        elems_read_in_dim[cur_dim]++;
+        first = 0;
+      }
+    } else {
+      ret = 1;
+      break;
+    }
+  }
+
+  free(knows_dimsize);
+  free(elems_read_in_dim);
+  return ret;
+}
+
+static int read_str_empty_array(FILE *f, char *buf, int bufsize,
+                                const char *type_name, int64_t *shape, int64_t dims) {
+  if (strlen(buf) == 0) {
+    // EOF
+    return 1;
+  }
+
+  if (strcmp(buf, "empty") != 0) {
+    return 1;
+  }
+
+  if (!next_token_is(f, buf, bufsize, "(")) {
+    return 1;
+  }
+
+  for (int i = 0; i < dims; i++) {
+    if (!next_token_is(f, buf, bufsize, "[")) {
+      return 1;
+    }
+
+    next_token(f, buf, bufsize);
+
+    if (sscanf(buf, "%"SCNu64, (uint64_t*)&shape[i]) != 1) {
+      return 1;
+    }
+
+    if (!next_token_is(f, buf, bufsize, "]")) {
+      return 1;
+    }
+  }
+
+  if (!next_token_is(f, buf, bufsize, type_name)) {
+    return 1;
+  }
+
+
+  if (!next_token_is(f, buf, bufsize, ")")) {
+    return 1;
+  }
+
+  // Check whether the array really is empty.
+  for (int i = 0; i < dims; i++) {
+    if (shape[i] == 0) {
+      return 0;
+    }
+  }
+
+  // Not an empty array!
+  return 1;
+}
+
+static int read_str_array(FILE *f,
+                          int64_t elem_size, str_reader elem_reader,
+                          const char *type_name,
+                          void **data, int64_t *shape, int64_t dims) {
+  int ret;
+  struct array_reader reader;
+  char buf[100];
+
+  int dims_seen;
+  for (dims_seen = 0; dims_seen < dims; dims_seen++) {
+    if (!next_token_is(f, buf, sizeof(buf), "[")) {
+      break;
+    }
+  }
+
+  if (dims_seen == 0) {
+    return read_str_empty_array(f, buf, sizeof(buf), type_name, shape, dims);
+  }
+
+  if (dims_seen != dims) {
+    return 1;
+  }
+
+  reader.shape = shape;
+  reader.n_elems_used = 0;
+  reader.elem_size = elem_size;
+  reader.n_elems_space = 16;
+  reader.elems = (char*) realloc(*data, (size_t)(elem_size*reader.n_elems_space));
+  reader.elem_reader = elem_reader;
+
+  ret = read_str_array_elems(f, buf, sizeof(buf), &reader, dims);
+
+  *data = reader.elems;
+
+  return ret;
+}
+
+#define READ_STR(MACRO, PTR, SUFFIX)                                   \
+  remove_underscores(buf);                                              \
+  int j;                                                                \
+  if (sscanf(buf, "%"MACRO"%n", (PTR*)dest, &j) == 1) {                 \
+    return !(strcmp(buf+j, "") == 0 || strcmp(buf+j, SUFFIX) == 0);     \
+  } else {                                                              \
+    return 1;                                                           \
+  }
+
+static int read_str_i8(char *buf, void* dest) {
+  // Some platforms (WINDOWS) does not support scanf %hhd or its
+  // cousin, %SCNi8.  Read into int first to avoid corrupting
+  // memory.
+  //
+  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63417
+  remove_underscores(buf);
+  int j, x;
+  if (sscanf(buf, "%i%n", &x, &j) == 1) {
+    *(int8_t*)dest = (int8_t)x;
+    return !(strcmp(buf+j, "") == 0 || strcmp(buf+j, "i8") == 0);
+  } else {
+    return 1;
+  }
+}
+
+static int read_str_u8(char *buf, void* dest) {
+  // Some platforms (WINDOWS) does not support scanf %hhd or its
+  // cousin, %SCNu8.  Read into int first to avoid corrupting
+  // memory.
+  //
+  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63417
+  remove_underscores(buf);
+  int j, x;
+  if (sscanf(buf, "%i%n", &x, &j) == 1) {
+    *(uint8_t*)dest = (uint8_t)x;
+    return !(strcmp(buf+j, "") == 0 || strcmp(buf+j, "u8") == 0);
+  } else {
+    return 1;
+  }
+}
+
+static int read_str_i16(char *buf, void* dest) {
+  READ_STR(SCNi16, int16_t, "i16");
+}
+
+static int read_str_u16(char *buf, void* dest) {
+  READ_STR(SCNi16, int16_t, "u16");
+}
+
+static int read_str_i32(char *buf, void* dest) {
+  READ_STR(SCNi32, int32_t, "i32");
+}
+
+static int read_str_u32(char *buf, void* dest) {
+  READ_STR(SCNi32, int32_t, "u32");
+}
+
+static int read_str_i64(char *buf, void* dest) {
+  READ_STR(SCNi64, int64_t, "i64");
+}
+
+static int read_str_u64(char *buf, void* dest) {
+  // FIXME: This is not correct, as SCNu64 only permits decimal
+  // literals.  However, SCNi64 does not handle very large numbers
+  // correctly (it's really for signed numbers, so that's fair).
+  READ_STR(SCNu64, uint64_t, "u64");
+}
+
+static int read_str_f32(char *buf, void* dest) {
+  remove_underscores(buf);
+  if (strcmp(buf, "f32.nan") == 0) {
+    *(float*)dest = NAN;
+    return 0;
+  } else if (strcmp(buf, "f32.inf") == 0) {
+    *(float*)dest = INFINITY;
+    return 0;
+  } else if (strcmp(buf, "-f32.inf") == 0) {
+    *(float*)dest = -INFINITY;
+    return 0;
+  } else {
+    READ_STR("f", float, "f32");
+  }
+}
+
+static int read_str_f64(char *buf, void* dest) {
+  remove_underscores(buf);
+  if (strcmp(buf, "f64.nan") == 0) {
+    *(double*)dest = NAN;
+    return 0;
+  } else if (strcmp(buf, "f64.inf") == 0) {
+    *(double*)dest = INFINITY;
+    return 0;
+  } else if (strcmp(buf, "-f64.inf") == 0) {
+    *(double*)dest = -INFINITY;
+    return 0;
+  } else {
+    READ_STR("lf", double, "f64");
+  }
+}
+
+static int read_str_bool(char *buf, void* dest) {
+  if (strcmp(buf, "true") == 0) {
+    *(char*)dest = 1;
+    return 0;
+  } else if (strcmp(buf, "false") == 0) {
+    *(char*)dest = 0;
+    return 0;
+  } else {
+    return 1;
+  }
+}
+
+static int write_str_i8(FILE *out, int8_t *src) {
+  return fprintf(out, "%hhdi8", *src);
+}
+
+static int write_str_u8(FILE *out, uint8_t *src) {
+  return fprintf(out, "%hhuu8", *src);
+}
+
+static int write_str_i16(FILE *out, int16_t *src) {
+  return fprintf(out, "%hdi16", *src);
+}
+
+static int write_str_u16(FILE *out, uint16_t *src) {
+  return fprintf(out, "%huu16", *src);
+}
+
+static int write_str_i32(FILE *out, int32_t *src) {
+  return fprintf(out, "%di32", *src);
+}
+
+static int write_str_u32(FILE *out, uint32_t *src) {
+  return fprintf(out, "%uu32", *src);
+}
+
+static int write_str_i64(FILE *out, int64_t *src) {
+  return fprintf(out, "%"PRIi64"i64", *src);
+}
+
+static int write_str_u64(FILE *out, uint64_t *src) {
+  return fprintf(out, "%"PRIu64"u64", *src);
+}
+
+static int write_str_f32(FILE *out, float *src) {
+  float x = *src;
+  if (isnan(x)) {
+    return fprintf(out, "f32.nan");
+  } else if (isinf(x) && x >= 0) {
+    return fprintf(out, "f32.inf");
+  } else if (isinf(x)) {
+    return fprintf(out, "-f32.inf");
+  } else {
+    return fprintf(out, "%.6ff32", x);
+  }
+}
+
+static int write_str_f64(FILE *out, double *src) {
+  double x = *src;
+  if (isnan(x)) {
+    return fprintf(out, "f64.nan");
+  } else if (isinf(x) && x >= 0) {
+    return fprintf(out, "f64.inf");
+  } else if (isinf(x)) {
+    return fprintf(out, "-f64.inf");
+  } else {
+    return fprintf(out, "%.6ff64", *src);
+  }
+}
+
+static int write_str_bool(FILE *out, void *src) {
+  return fprintf(out, *(char*)src ? "true" : "false");
+}
+
+//// Binary I/O
+
+#define BINARY_FORMAT_VERSION 2
+#define IS_BIG_ENDIAN (!*(unsigned char *)&(uint16_t){1})
+
+static void flip_bytes(int elem_size, unsigned char *elem) {
+  for (int j=0; j<elem_size/2; j++) {
+    unsigned char head = elem[j];
+    int tail_index = elem_size-1-j;
+    elem[j] = elem[tail_index];
+    elem[tail_index] = head;
+  }
+}
+
+// On Windows we need to explicitly set the file mode to not mangle
+// newline characters.  On *nix there is no difference.
+#ifdef _WIN32
+#include <io.h>
+#include <fcntl.h>
+static void set_binary_mode(FILE *f) {
+  setmode(fileno(f), O_BINARY);
+}
+#else
+static void set_binary_mode(FILE *f) {
+  (void)f;
+}
+#endif
+
+static int read_byte(FILE *f, void* dest) {
+  int num_elems_read = fread(dest, 1, 1, f);
+  return num_elems_read == 1 ? 0 : 1;
+}
+
+//// Types
+
+struct primtype_info_t {
+  const char binname[4]; // Used for parsing binary data.
+  const char* type_name; // Same name as in Futhark.
+  const int64_t size; // in bytes
+  const writer write_str; // Write in text format.
+  const str_reader read_str; // Read in text format.
+};
+
+static const struct primtype_info_t i8_info =
+  {.binname = "  i8", .type_name = "i8",   .size = 1,
+   .write_str = (writer)write_str_i8, .read_str = (str_reader)read_str_i8};
+static const struct primtype_info_t i16_info =
+  {.binname = " i16", .type_name = "i16",  .size = 2,
+   .write_str = (writer)write_str_i16, .read_str = (str_reader)read_str_i16};
+static const struct primtype_info_t i32_info =
+  {.binname = " i32", .type_name = "i32",  .size = 4,
+   .write_str = (writer)write_str_i32, .read_str = (str_reader)read_str_i32};
+static const struct primtype_info_t i64_info =
+  {.binname = " i64", .type_name = "i64",  .size = 8,
+   .write_str = (writer)write_str_i64, .read_str = (str_reader)read_str_i64};
+static const struct primtype_info_t u8_info =
+  {.binname = "  u8", .type_name = "u8",   .size = 1,
+   .write_str = (writer)write_str_u8, .read_str = (str_reader)read_str_u8};
+static const struct primtype_info_t u16_info =
+  {.binname = " u16", .type_name = "u16",  .size = 2,
+   .write_str = (writer)write_str_u16, .read_str = (str_reader)read_str_u16};
+static const struct primtype_info_t u32_info =
+  {.binname = " u32", .type_name = "u32",  .size = 4,
+   .write_str = (writer)write_str_u32, .read_str = (str_reader)read_str_u32};
+static const struct primtype_info_t u64_info =
+  {.binname = " u64", .type_name = "u64",  .size = 8,
+   .write_str = (writer)write_str_u64, .read_str = (str_reader)read_str_u64};
+static const struct primtype_info_t f32_info =
+  {.binname = " f32", .type_name = "f32",  .size = 4,
+   .write_str = (writer)write_str_f32, .read_str = (str_reader)read_str_f32};
+static const struct primtype_info_t f64_info =
+  {.binname = " f64", .type_name = "f64",  .size = 8,
+   .write_str = (writer)write_str_f64, .read_str = (str_reader)read_str_f64};
+static const struct primtype_info_t bool_info =
+  {.binname = "bool", .type_name = "bool", .size = 1,
+   .write_str = (writer)write_str_bool, .read_str = (str_reader)read_str_bool};
+
+static const struct primtype_info_t* primtypes[] = {
+  &i8_info, &i16_info, &i32_info, &i64_info,
+  &u8_info, &u16_info, &u32_info, &u64_info,
+  &f32_info, &f64_info,
+  &bool_info,
+  NULL // NULL-terminated
+};
+
+// General value interface.  All endian business taken care of at
+// lower layers.
+
+static int read_is_binary(FILE *f) {
+  skipspaces(f);
+  int c = getc(f);
+  if (c == 'b') {
+    int8_t bin_version;
+    int ret = read_byte(f, &bin_version);
+
+    if (ret != 0) { futhark_panic(1, "binary-input: could not read version.\n"); }
+
+    if (bin_version != BINARY_FORMAT_VERSION) {
+      futhark_panic(1, "binary-input: File uses version %i, but I only understand version %i.\n",
+            bin_version, BINARY_FORMAT_VERSION);
+    }
+
+    return 1;
+  }
+  ungetc(c, f);
+  return 0;
+}
+
+static const struct primtype_info_t* read_bin_read_type_enum(FILE *f) {
+  char read_binname[4];
+
+  int num_matched = fscanf(f, "%4c", read_binname);
+  if (num_matched != 1) { futhark_panic(1, "binary-input: Couldn't read element type.\n"); }
+
+  const struct primtype_info_t **type = primtypes;
+
+  for (; *type != NULL; type++) {
+    // I compare the 4 characters manually instead of using strncmp because
+    // this allows any value to be used, also NULL bytes
+    if (memcmp(read_binname, (*type)->binname, 4) == 0) {
+      return *type;
+    }
+  }
+  futhark_panic(1, "binary-input: Did not recognize the type '%s'.\n", read_binname);
+  return NULL;
+}
+
+static void read_bin_ensure_scalar(FILE *f, const struct primtype_info_t *expected_type) {
+  int8_t bin_dims;
+  int ret = read_byte(f, &bin_dims);
+  if (ret != 0) { futhark_panic(1, "binary-input: Couldn't get dims.\n"); }
+
+  if (bin_dims != 0) {
+    futhark_panic(1, "binary-input: Expected scalar (0 dimensions), but got array with %i dimensions.\n",
+          bin_dims);
+  }
+
+  const struct primtype_info_t *bin_type = read_bin_read_type_enum(f);
+  if (bin_type != expected_type) {
+    futhark_panic(1, "binary-input: Expected scalar of type %s but got scalar of type %s.\n",
+          expected_type->type_name,
+          bin_type->type_name);
+  }
+}
+
+//// High-level interface
+
+static int read_bin_array(FILE *f,
+                          const struct primtype_info_t *expected_type, void **data, int64_t *shape, int64_t dims) {
+  int ret;
+
+  int8_t bin_dims;
+  ret = read_byte(f, &bin_dims);
+  if (ret != 0) { futhark_panic(1, "binary-input: Couldn't get dims.\n"); }
+
+  if (bin_dims != dims) {
+    futhark_panic(1, "binary-input: Expected %i dimensions, but got array with %i dimensions.\n",
+          dims, bin_dims);
+  }
+
+  const struct primtype_info_t *bin_primtype = read_bin_read_type_enum(f);
+  if (expected_type != bin_primtype) {
+    futhark_panic(1, "binary-input: Expected %iD-array with element type '%s' but got %iD-array with element type '%s'.\n",
+          dims, expected_type->type_name, dims, bin_primtype->type_name);
+  }
+
+  int64_t elem_count = 1;
+  for (int i=0; i<dims; i++) {
+    int64_t bin_shape;
+    ret = fread(&bin_shape, sizeof(bin_shape), 1, f);
+    if (ret != 1) {
+      futhark_panic(1, "binary-input: Couldn't read size for dimension %i of array.\n", i);
+    }
+    if (IS_BIG_ENDIAN) {
+      flip_bytes(sizeof(bin_shape), (unsigned char*) &bin_shape);
+    }
+    elem_count *= bin_shape;
+    shape[i] = bin_shape;
+  }
+
+  int64_t elem_size = expected_type->size;
+  void* tmp = realloc(*data, (size_t)(elem_count * elem_size));
+  if (tmp == NULL) {
+    futhark_panic(1, "binary-input: Failed to allocate array of size %i.\n",
+          elem_count * elem_size);
+  }
+  *data = tmp;
+
+  int64_t num_elems_read = (int64_t)fread(*data, (size_t)elem_size, (size_t)elem_count, f);
+  if (num_elems_read != elem_count) {
+    futhark_panic(1, "binary-input: tried to read %i elements of an array, but only got %i elements.\n",
+          elem_count, num_elems_read);
+  }
+
+  // If we're on big endian platform we must change all multibyte elements
+  // from using little endian to big endian
+  if (IS_BIG_ENDIAN && elem_size != 1) {
+    flip_bytes(elem_size, (unsigned char*) *data);
+  }
+
+  return 0;
+}
+
+static int read_array(FILE *f, const struct primtype_info_t *expected_type, void **data, int64_t *shape, int64_t dims) {
+  if (!read_is_binary(f)) {
+    return read_str_array(f, expected_type->size, (str_reader)expected_type->read_str, expected_type->type_name, data, shape, dims);
+  } else {
+    return read_bin_array(f, expected_type, data, shape, dims);
+  }
+}
+
+static int end_of_input(FILE *f) {
+  skipspaces(f);
+  char token[2];
+  next_token(f, token, sizeof(token));
+  if (strcmp(token, "") == 0) {
+    return 0;
+  } else {
+    return 1;
+  }
+}
+
+static int write_str_array(FILE *out,
+                           const struct primtype_info_t *elem_type,
+                           const unsigned char *data,
+                           const int64_t *shape,
+                           int8_t rank) {
+  if (rank==0) {
+    elem_type->write_str(out, (void*)data);
+  } else {
+    int64_t len = (int64_t)shape[0];
+    int64_t slice_size = 1;
+
+    int64_t elem_size = elem_type->size;
+    for (int8_t i = 1; i < rank; i++) {
+      slice_size *= shape[i];
+    }
+
+    if (len*slice_size == 0) {
+      fprintf(out, "empty(");
+      for (int64_t i = 0; i < rank; i++) {
+        fprintf(out, "[%"PRIi64"]", shape[i]);
+      }
+      fprintf(out, "%s", elem_type->type_name);
+      fprintf(out, ")");
+    } else if (rank==1) {
+      fputc('[', out);
+      for (int64_t i = 0; i < len; i++) {
+        elem_type->write_str(out, (void*) (data + i * elem_size));
+        if (i != len-1) {
+          fprintf(out, ", ");
+        }
+      }
+      fputc(']', out);
+    } else {
+      fputc('[', out);
+      for (int64_t i = 0; i < len; i++) {
+        write_str_array(out, elem_type, data + i * slice_size * elem_size, shape+1, rank-1);
+        if (i != len-1) {
+          fprintf(out, ", ");
+        }
+      }
+      fputc(']', out);
+    }
+  }
+  return 0;
+}
+
+static int write_bin_array(FILE *out,
+                           const struct primtype_info_t *elem_type,
+                           const unsigned char *data,
+                           const int64_t *shape,
+                           int8_t rank) {
+  int64_t num_elems = 1;
+  for (int64_t i = 0; i < rank; i++) {
+    num_elems *= shape[i];
+  }
+
+  fputc('b', out);
+  fputc((char)BINARY_FORMAT_VERSION, out);
+  fwrite(&rank, sizeof(int8_t), 1, out);
+  fwrite(elem_type->binname, 4, 1, out);
+  if (shape != NULL) {
+    fwrite(shape, sizeof(int64_t), (size_t)rank, out);
+  }
+
+  if (IS_BIG_ENDIAN) {
+    for (int64_t i = 0; i < num_elems; i++) {
+      const unsigned char *elem = data+i*elem_type->size;
+      for (int64_t j = 0; j < elem_type->size; j++) {
+        fwrite(&elem[elem_type->size-j], 1, 1, out);
+      }
+    }
+  } else {
+    fwrite(data, (size_t)elem_type->size, (size_t)num_elems, out);
+  }
+
+  return 0;
+}
+
+static int write_array(FILE *out, int write_binary,
+                       const struct primtype_info_t *elem_type,
+                       const void *data,
+                       const int64_t *shape,
+                       const int8_t rank) {
+  if (write_binary) {
+    return write_bin_array(out, elem_type, data, shape, rank);
+  } else {
+    return write_str_array(out, elem_type, data, shape, rank);
+  }
+}
+
+static int read_scalar(FILE *f,
+                       const struct primtype_info_t *expected_type, void *dest) {
+  if (!read_is_binary(f)) {
+    char buf[100];
+    next_token(f, buf, sizeof(buf));
+    return expected_type->read_str(buf, dest);
+  } else {
+    read_bin_ensure_scalar(f, expected_type);
+    int64_t elem_size = expected_type->size;
+    int num_elems_read = fread(dest, (size_t)elem_size, 1, f);
+    if (IS_BIG_ENDIAN) {
+      flip_bytes(elem_size, (unsigned char*) dest);
+    }
+    return num_elems_read == 1 ? 0 : 1;
+  }
+}
+
+static int write_scalar(FILE *out, int write_binary, const struct primtype_info_t *type, void *src) {
+  if (write_binary) {
+    return write_bin_array(out, type, src, NULL, 0);
+  } else {
+    return type->write_str(out, src);
+  }
+}
+
+// End of values.h.
+
+static int binary_output = 0;
+static FILE *runtime_file;
+static int perform_warmup = 0;
+static int num_runs = 1;
+static const char *entry_point = "main";
+// Start of tuning.h.
+
+static char* load_tuning_file(const char *fname,
+                              void *cfg,
+                              int (*set_size)(void*, const char*, size_t)) {
+  const int max_line_len = 1024;
+  char* line = (char*) malloc(max_line_len);
+
+  FILE *f = fopen(fname, "r");
+
+  if (f == NULL) {
+    snprintf(line, max_line_len, "Cannot open file: %s", strerror(errno));
+    return line;
+  }
+
+  int lineno = 0;
+  while (fgets(line, max_line_len, f) != NULL) {
+    lineno++;
+    char *eql = strstr(line, "=");
+    if (eql) {
+      *eql = 0;
+      int value = atoi(eql+1);
+      if (set_size(cfg, line, value) != 0) {
+        strncpy(eql+1, line, max_line_len-strlen(line)-1);
+        snprintf(line, max_line_len, "Unknown name '%s' on line %d.", eql+1, lineno);
+        return line;
+      }
+    } else {
+      snprintf(line, max_line_len, "Invalid line %d (must be of form 'name=int').",
+               lineno);
+      return line;
+    }
+  }
+
+  free(line);
+
+  return NULL;
+}
+
+// End of tuning.h.
+
+int parse_options(struct futhark_context_config *cfg, int argc,
+                  char *const argv[])
+{
+    int ch;
+    static struct option long_options[] = {{"write-runtime-to",
+                                            required_argument, NULL, 1},
+                                           {"runs", required_argument, NULL, 2},
+                                           {"debugging", no_argument, NULL, 3},
+                                           {"log", no_argument, NULL, 4},
+                                           {"entry-point", required_argument,
+                                            NULL, 5}, {"binary-output",
+                                                       no_argument, NULL, 6},
+                                           {"help", no_argument, NULL, 7}, {0,
+                                                                            0,
+                                                                            0,
+                                                                            0}};
+    static char *option_descriptions =
+                "  -t/--write-runtime-to FILE Print the time taken to execute the program to the indicated file, an integral number of microseconds.\n  -r/--runs INT              Perform NUM runs of the program.\n  -D/--debugging             Perform possibly expensive internal correctness checks and verbose logging.\n  -L/--log                   Print various low-overhead logging information to stderr while running.\n  -e/--entry-point NAME      The entry point to run. Defaults to main.\n  -b/--binary-output         Print the program result in the binary output format.\n  -h/--help                  Print help information and exit.\n";
+    
+    while ((ch = getopt_long(argc, argv, ":t:r:DLe:bh", long_options, NULL)) !=
+           -1) {
+        if (ch == 1 || ch == 't') {
+            runtime_file = fopen(optarg, "w");
+            if (runtime_file == NULL)
+                futhark_panic(1, "Cannot open %s: %s\n", optarg,
+                              strerror(errno));
+        }
+        if (ch == 2 || ch == 'r') {
+            num_runs = atoi(optarg);
+            perform_warmup = 1;
+            if (num_runs <= 0)
+                futhark_panic(1, "Need a positive number of runs, not %s\n",
+                              optarg);
+        }
+        if (ch == 3 || ch == 'D')
+            futhark_context_config_set_debugging(cfg, 1);
+        if (ch == 4 || ch == 'L')
+            futhark_context_config_set_logging(cfg, 1);
+        if (ch == 5 || ch == 'e') {
+            if (entry_point != NULL)
+                entry_point = optarg;
+        }
+        if (ch == 6 || ch == 'b')
+            binary_output = 1;
+        if (ch == 7 || ch == 'h') {
+            printf("Usage: %s [OPTION]...\nOptions:\n\n%s\nFor more information, consult the Futhark User's Guide or the man pages.\n",
+                   fut_progname, option_descriptions);
+            exit(0);
+        }
+        if (ch == ':')
+            futhark_panic(-1, "Missing argument for option %s\n", argv[optind -
+                                                                       1]);
+        if (ch == '?') {
+            fprintf(stderr, "Usage: %s: %s\n", fut_progname,
+                    "  -t/--write-runtime-to FILE Print the time taken to execute the program to the indicated file, an integral number of microseconds.\n  -r/--runs INT              Perform NUM runs of the program.\n  -D/--debugging             Perform possibly expensive internal correctness checks and verbose logging.\n  -L/--log                   Print various low-overhead logging information to stderr while running.\n  -e/--entry-point NAME      The entry point to run. Defaults to main.\n  -b/--binary-output         Print the program result in the binary output format.\n  -h/--help                  Print help information and exit.\n");
+            futhark_panic(1, "Unknown option: %s\n", argv[optind - 1]);
+        }
+    }
+    return optind;
+}
+typedef void entry_point_fun(struct futhark_context *);
+struct entry_point_entry {
+    const char *name;
+    entry_point_fun *fun;
+} ;
+int main(int argc, char **argv)
+{
+    fut_progname = argv[0];
+    
+    struct entry_point_entry entry_points[] = {};
+    struct futhark_context_config *cfg = futhark_context_config_new();
+    
+    assert(cfg != NULL);
+    
+    int parsed_options = parse_options(cfg, argc, argv);
+    
+    argc -= parsed_options;
+    argv += parsed_options;
+    if (argc != 0)
+        futhark_panic(1, "Excess non-option: %s\n", argv[0]);
+    
+    struct futhark_context *ctx = futhark_context_new(cfg);
+    
+    assert(ctx != NULL);
+    
+    char *error = futhark_context_get_error(ctx);
+    
+    if (error != NULL)
+        futhark_panic(1, "%s", error);
+    if (entry_point != NULL) {
+        int num_entry_points = sizeof(entry_points) / sizeof(entry_points[0]);
+        entry_point_fun *entry_point_fun = NULL;
+        
+        for (int i = 0; i < num_entry_points; i++) {
+            if (strcmp(entry_points[i].name, entry_point) == 0) {
+                entry_point_fun = entry_points[i].fun;
+                break;
+            }
+        }
+        if (entry_point_fun == NULL) {
+            fprintf(stderr,
+                    "No entry point '%s'.  Select another with --entry-point.  Options are:\n",
+                    entry_point);
+            for (int i = 0; i < num_entry_points; i++)
+                fprintf(stderr, "%s\n", entry_points[i].name);
+            return 1;
+        }
+        entry_point_fun(ctx);
+        if (runtime_file != NULL)
+            fclose(runtime_file);
+        
+        char *report = futhark_context_report(ctx);
+        
+        fputs(report, stderr);
+        free(report);
+    }
+    futhark_context_free(ctx);
+    futhark_context_config_free(cfg);
+    return 0;
+}
+#ifdef _MSC_VER
+#define inline __inline
+#endif
+#include <string.h>
+#include <inttypes.h>
+#include <ctype.h>
+#include <errno.h>
+#include <assert.h>
+
+// Start of lock.h.
+
+// A very simple cross-platform implementation of locks.  Uses
+// pthreads on Unix and some Windows thing there.  Futhark's
+// host-level code is not multithreaded, but user code may be, so we
+// need some mechanism for ensuring atomic access to API functions.
+// This is that mechanism.  It is not exposed to user code at all, so
+// we do not have to worry about name collisions.
+
+#ifdef _WIN32
+
+typedef HANDLE lock_t;
+
+static void create_lock(lock_t *lock) {
+  *lock = CreateMutex(NULL,  // Default security attributes.
+                      FALSE, // Initially unlocked.
+                      NULL); // Unnamed.
+}
+
+static void lock_lock(lock_t *lock) {
+  assert(WaitForSingleObject(*lock, INFINITE) == WAIT_OBJECT_0);
+}
+
+static void lock_unlock(lock_t *lock) {
+  assert(ReleaseMutex(*lock));
+}
+
+static void free_lock(lock_t *lock) {
+  CloseHandle(*lock);
+}
+
+#else
+// Assuming POSIX
+
+#include <pthread.h>
+
+typedef pthread_mutex_t lock_t;
+
+static void create_lock(lock_t *lock) {
+  int r = pthread_mutex_init(lock, NULL);
+  assert(r == 0);
+}
+
+static void lock_lock(lock_t *lock) {
+  int r = pthread_mutex_lock(lock);
+  assert(r == 0);
+}
+
+static void lock_unlock(lock_t *lock) {
+  int r = pthread_mutex_unlock(lock);
+  assert(r == 0);
+}
+
+static void free_lock(lock_t *lock) {
+  // Nothing to do for pthreads.
+  (void)lock;
+}
+
+#endif
+
+// End of lock.h.
+
+static inline uint8_t add8(uint8_t x, uint8_t y)
+{
+    return x + y;
+}
+static inline uint16_t add16(uint16_t x, uint16_t y)
+{
+    return x + y;
+}
+static inline uint32_t add32(uint32_t x, uint32_t y)
+{
+    return x + y;
+}
+static inline uint64_t add64(uint64_t x, uint64_t y)
+{
+    return x + y;
+}
+static inline uint8_t sub8(uint8_t x, uint8_t y)
+{
+    return x - y;
+}
+static inline uint16_t sub16(uint16_t x, uint16_t y)
+{
+    return x - y;
+}
+static inline uint32_t sub32(uint32_t x, uint32_t y)
+{
+    return x - y;
+}
+static inline uint64_t sub64(uint64_t x, uint64_t y)
+{
+    return x - y;
+}
+static inline uint8_t mul8(uint8_t x, uint8_t y)
+{
+    return x * y;
+}
+static inline uint16_t mul16(uint16_t x, uint16_t y)
+{
+    return x * y;
+}
+static inline uint32_t mul32(uint32_t x, uint32_t y)
+{
+    return x * y;
+}
+static inline uint64_t mul64(uint64_t x, uint64_t y)
+{
+    return x * y;
+}
+static inline uint8_t udiv8(uint8_t x, uint8_t y)
+{
+    return x / y;
+}
+static inline uint16_t udiv16(uint16_t x, uint16_t y)
+{
+    return x / y;
+}
+static inline uint32_t udiv32(uint32_t x, uint32_t y)
+{
+    return x / y;
+}
+static inline uint64_t udiv64(uint64_t x, uint64_t y)
+{
+    return x / y;
+}
+static inline uint8_t udiv_up8(uint8_t x, uint8_t y)
+{
+    return (x + y - 1) / y;
+}
+static inline uint16_t udiv_up16(uint16_t x, uint16_t y)
+{
+    return (x + y - 1) / y;
+}
+static inline uint32_t udiv_up32(uint32_t x, uint32_t y)
+{
+    return (x + y - 1) / y;
+}
+static inline uint64_t udiv_up64(uint64_t x, uint64_t y)
+{
+    return (x + y - 1) / y;
+}
+static inline uint8_t umod8(uint8_t x, uint8_t y)
+{
+    return x % y;
+}
+static inline uint16_t umod16(uint16_t x, uint16_t y)
+{
+    return x % y;
+}
+static inline uint32_t umod32(uint32_t x, uint32_t y)
+{
+    return x % y;
+}
+static inline uint64_t umod64(uint64_t x, uint64_t y)
+{
+    return x % y;
+}
+static inline uint8_t udiv_safe8(uint8_t x, uint8_t y)
+{
+    return y == 0 ? 0 : x / y;
+}
+static inline uint16_t udiv_safe16(uint16_t x, uint16_t y)
+{
+    return y == 0 ? 0 : x / y;
+}
+static inline uint32_t udiv_safe32(uint32_t x, uint32_t y)
+{
+    return y == 0 ? 0 : x / y;
+}
+static inline uint64_t udiv_safe64(uint64_t x, uint64_t y)
+{
+    return y == 0 ? 0 : x / y;
+}
+static inline uint8_t udiv_up_safe8(uint8_t x, uint8_t y)
+{
+    return y == 0 ? 0 : (x + y - 1) / y;
+}
+static inline uint16_t udiv_up_safe16(uint16_t x, uint16_t y)
+{
+    return y == 0 ? 0 : (x + y - 1) / y;
+}
+static inline uint32_t udiv_up_safe32(uint32_t x, uint32_t y)
+{
+    return y == 0 ? 0 : (x + y - 1) / y;
+}
+static inline uint64_t udiv_up_safe64(uint64_t x, uint64_t y)
+{
+    return y == 0 ? 0 : (x + y - 1) / y;
+}
+static inline uint8_t umod_safe8(uint8_t x, uint8_t y)
+{
+    return y == 0 ? 0 : x % y;
+}
+static inline uint16_t umod_safe16(uint16_t x, uint16_t y)
+{
+    return y == 0 ? 0 : x % y;
+}
+static inline uint32_t umod_safe32(uint32_t x, uint32_t y)
+{
+    return y == 0 ? 0 : x % y;
+}
+static inline uint64_t umod_safe64(uint64_t x, uint64_t y)
+{
+    return y == 0 ? 0 : x % y;
+}
+static inline int8_t sdiv8(int8_t x, int8_t y)
+{
+    int8_t q = x / y;
+    int8_t r = x % y;
+    
+    return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0);
+}
+static inline int16_t sdiv16(int16_t x, int16_t y)
+{
+    int16_t q = x / y;
+    int16_t r = x % y;
+    
+    return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0);
+}
+static inline int32_t sdiv32(int32_t x, int32_t y)
+{
+    int32_t q = x / y;
+    int32_t r = x % y;
+    
+    return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0);
+}
+static inline int64_t sdiv64(int64_t x, int64_t y)
+{
+    int64_t q = x / y;
+    int64_t r = x % y;
+    
+    return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0);
+}
+static inline int8_t sdiv_up8(int8_t x, int8_t y)
+{
+    return sdiv8(x + y - 1, y);
+}
+static inline int16_t sdiv_up16(int16_t x, int16_t y)
+{
+    return sdiv16(x + y - 1, y);
+}
+static inline int32_t sdiv_up32(int32_t x, int32_t y)
+{
+    return sdiv32(x + y - 1, y);
+}
+static inline int64_t sdiv_up64(int64_t x, int64_t y)
+{
+    return sdiv64(x + y - 1, y);
+}
+static inline int8_t smod8(int8_t x, int8_t y)
+{
+    int8_t r = x % y;
+    
+    return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y);
+}
+static inline int16_t smod16(int16_t x, int16_t y)
+{
+    int16_t r = x % y;
+    
+    return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y);
+}
+static inline int32_t smod32(int32_t x, int32_t y)
+{
+    int32_t r = x % y;
+    
+    return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y);
+}
+static inline int64_t smod64(int64_t x, int64_t y)
+{
+    int64_t r = x % y;
+    
+    return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y);
+}
+static inline int8_t sdiv_safe8(int8_t x, int8_t y)
+{
+    return y == 0 ? 0 : sdiv8(x, y);
+}
+static inline int16_t sdiv_safe16(int16_t x, int16_t y)
+{
+    return y == 0 ? 0 : sdiv16(x, y);
+}
+static inline int32_t sdiv_safe32(int32_t x, int32_t y)
+{
+    return y == 0 ? 0 : sdiv32(x, y);
+}
+static inline int64_t sdiv_safe64(int64_t x, int64_t y)
+{
+    return y == 0 ? 0 : sdiv64(x, y);
+}
+static inline int8_t sdiv_up_safe8(int8_t x, int8_t y)
+{
+    return sdiv_safe8(x + y - 1, y);
+}
+static inline int16_t sdiv_up_safe16(int16_t x, int16_t y)
+{
+    return sdiv_safe16(x + y - 1, y);
+}
+static inline int32_t sdiv_up_safe32(int32_t x, int32_t y)
+{
+    return sdiv_safe32(x + y - 1, y);
+}
+static inline int64_t sdiv_up_safe64(int64_t x, int64_t y)
+{
+    return sdiv_safe64(x + y - 1, y);
+}
+static inline int8_t smod_safe8(int8_t x, int8_t y)
+{
+    return y == 0 ? 0 : smod8(x, y);
+}
+static inline int16_t smod_safe16(int16_t x, int16_t y)
+{
+    return y == 0 ? 0 : smod16(x, y);
+}
+static inline int32_t smod_safe32(int32_t x, int32_t y)
+{
+    return y == 0 ? 0 : smod32(x, y);
+}
+static inline int64_t smod_safe64(int64_t x, int64_t y)
+{
+    return y == 0 ? 0 : smod64(x, y);
+}
+static inline int8_t squot8(int8_t x, int8_t y)
+{
+    return x / y;
+}
+static inline int16_t squot16(int16_t x, int16_t y)
+{
+    return x / y;
+}
+static inline int32_t squot32(int32_t x, int32_t y)
+{
+    return x / y;
+}
+static inline int64_t squot64(int64_t x, int64_t y)
+{
+    return x / y;
+}
+static inline int8_t srem8(int8_t x, int8_t y)
+{
+    return x % y;
+}
+static inline int16_t srem16(int16_t x, int16_t y)
+{
+    return x % y;
+}
+static inline int32_t srem32(int32_t x, int32_t y)
+{
+    return x % y;
+}
+static inline int64_t srem64(int64_t x, int64_t y)
+{
+    return x % y;
+}
+static inline int8_t squot_safe8(int8_t x, int8_t y)
+{
+    return y == 0 ? 0 : x / y;
+}
+static inline int16_t squot_safe16(int16_t x, int16_t y)
+{
+    return y == 0 ? 0 : x / y;
+}
+static inline int32_t squot_safe32(int32_t x, int32_t y)
+{
+    return y == 0 ? 0 : x / y;
+}
+static inline int64_t squot_safe64(int64_t x, int64_t y)
+{
+    return y == 0 ? 0 : x / y;
+}
+static inline int8_t srem_safe8(int8_t x, int8_t y)
+{
+    return y == 0 ? 0 : x % y;
+}
+static inline int16_t srem_safe16(int16_t x, int16_t y)
+{
+    return y == 0 ? 0 : x % y;
+}
+static inline int32_t srem_safe32(int32_t x, int32_t y)
+{
+    return y == 0 ? 0 : x % y;
+}
+static inline int64_t srem_safe64(int64_t x, int64_t y)
+{
+    return y == 0 ? 0 : x % y;
+}
+static inline int8_t smin8(int8_t x, int8_t y)
+{
+    return x < y ? x : y;
+}
+static inline int16_t smin16(int16_t x, int16_t y)
+{
+    return x < y ? x : y;
+}
+static inline int32_t smin32(int32_t x, int32_t y)
+{
+    return x < y ? x : y;
+}
+static inline int64_t smin64(int64_t x, int64_t y)
+{
+    return x < y ? x : y;
+}
+static inline uint8_t umin8(uint8_t x, uint8_t y)
+{
+    return x < y ? x : y;
+}
+static inline uint16_t umin16(uint16_t x, uint16_t y)
+{
+    return x < y ? x : y;
+}
+static inline uint32_t umin32(uint32_t x, uint32_t y)
+{
+    return x < y ? x : y;
+}
+static inline uint64_t umin64(uint64_t x, uint64_t y)
+{
+    return x < y ? x : y;
+}
+static inline int8_t smax8(int8_t x, int8_t y)
+{
+    return x < y ? y : x;
+}
+static inline int16_t smax16(int16_t x, int16_t y)
+{
+    return x < y ? y : x;
+}
+static inline int32_t smax32(int32_t x, int32_t y)
+{
+    return x < y ? y : x;
+}
+static inline int64_t smax64(int64_t x, int64_t y)
+{
+    return x < y ? y : x;
+}
+static inline uint8_t umax8(uint8_t x, uint8_t y)
+{
+    return x < y ? y : x;
+}
+static inline uint16_t umax16(uint16_t x, uint16_t y)
+{
+    return x < y ? y : x;
+}
+static inline uint32_t umax32(uint32_t x, uint32_t y)
+{
+    return x < y ? y : x;
+}
+static inline uint64_t umax64(uint64_t x, uint64_t y)
+{
+    return x < y ? y : x;
+}
+static inline uint8_t shl8(uint8_t x, uint8_t y)
+{
+    return x << y;
+}
+static inline uint16_t shl16(uint16_t x, uint16_t y)
+{
+    return x << y;
+}
+static inline uint32_t shl32(uint32_t x, uint32_t y)
+{
+    return x << y;
+}
+static inline uint64_t shl64(uint64_t x, uint64_t y)
+{
+    return x << y;
+}
+static inline uint8_t lshr8(uint8_t x, uint8_t y)
+{
+    return x >> y;
+}
+static inline uint16_t lshr16(uint16_t x, uint16_t y)
+{
+    return x >> y;
+}
+static inline uint32_t lshr32(uint32_t x, uint32_t y)
+{
+    return x >> y;
+}
+static inline uint64_t lshr64(uint64_t x, uint64_t y)
+{
+    return x >> y;
+}
+static inline int8_t ashr8(int8_t x, int8_t y)
+{
+    return x >> y;
+}
+static inline int16_t ashr16(int16_t x, int16_t y)
+{
+    return x >> y;
+}
+static inline int32_t ashr32(int32_t x, int32_t y)
+{
+    return x >> y;
+}
+static inline int64_t ashr64(int64_t x, int64_t y)
+{
+    return x >> y;
+}
+static inline uint8_t and8(uint8_t x, uint8_t y)
+{
+    return x & y;
+}
+static inline uint16_t and16(uint16_t x, uint16_t y)
+{
+    return x & y;
+}
+static inline uint32_t and32(uint32_t x, uint32_t y)
+{
+    return x & y;
+}
+static inline uint64_t and64(uint64_t x, uint64_t y)
+{
+    return x & y;
+}
+static inline uint8_t or8(uint8_t x, uint8_t y)
+{
+    return x | y;
+}
+static inline uint16_t or16(uint16_t x, uint16_t y)
+{
+    return x | y;
+}
+static inline uint32_t or32(uint32_t x, uint32_t y)
+{
+    return x | y;
+}
+static inline uint64_t or64(uint64_t x, uint64_t y)
+{
+    return x | y;
+}
+static inline uint8_t xor8(uint8_t x, uint8_t y)
+{
+    return x ^ y;
+}
+static inline uint16_t xor16(uint16_t x, uint16_t y)
+{
+    return x ^ y;
+}
+static inline uint32_t xor32(uint32_t x, uint32_t y)
+{
+    return x ^ y;
+}
+static inline uint64_t xor64(uint64_t x, uint64_t y)
+{
+    return x ^ y;
+}
+static inline bool ult8(uint8_t x, uint8_t y)
+{
+    return x < y;
+}
+static inline bool ult16(uint16_t x, uint16_t y)
+{
+    return x < y;
+}
+static inline bool ult32(uint32_t x, uint32_t y)
+{
+    return x < y;
+}
+static inline bool ult64(uint64_t x, uint64_t y)
+{
+    return x < y;
+}
+static inline bool ule8(uint8_t x, uint8_t y)
+{
+    return x <= y;
+}
+static inline bool ule16(uint16_t x, uint16_t y)
+{
+    return x <= y;
+}
+static inline bool ule32(uint32_t x, uint32_t y)
+{
+    return x <= y;
+}
+static inline bool ule64(uint64_t x, uint64_t y)
+{
+    return x <= y;
+}
+static inline bool slt8(int8_t x, int8_t y)
+{
+    return x < y;
+}
+static inline bool slt16(int16_t x, int16_t y)
+{
+    return x < y;
+}
+static inline bool slt32(int32_t x, int32_t y)
+{
+    return x < y;
+}
+static inline bool slt64(int64_t x, int64_t y)
+{
+    return x < y;
+}
+static inline bool sle8(int8_t x, int8_t y)
+{
+    return x <= y;
+}
+static inline bool sle16(int16_t x, int16_t y)
+{
+    return x <= y;
+}
+static inline bool sle32(int32_t x, int32_t y)
+{
+    return x <= y;
+}
+static inline bool sle64(int64_t x, int64_t y)
+{
+    return x <= y;
+}
+static inline int8_t pow8(int8_t x, int8_t y)
+{
+    int8_t res = 1, rem = y;
+    
+    while (rem != 0) {
+        if (rem & 1)
+            res *= x;
+        rem >>= 1;
+        x *= x;
+    }
+    return res;
+}
+static inline int16_t pow16(int16_t x, int16_t y)
+{
+    int16_t res = 1, rem = y;
+    
+    while (rem != 0) {
+        if (rem & 1)
+            res *= x;
+        rem >>= 1;
+        x *= x;
+    }
+    return res;
+}
+static inline int32_t pow32(int32_t x, int32_t y)
+{
+    int32_t res = 1, rem = y;
+    
+    while (rem != 0) {
+        if (rem & 1)
+            res *= x;
+        rem >>= 1;
+        x *= x;
+    }
+    return res;
+}
+static inline int64_t pow64(int64_t x, int64_t y)
+{
+    int64_t res = 1, rem = y;
+    
+    while (rem != 0) {
+        if (rem & 1)
+            res *= x;
+        rem >>= 1;
+        x *= x;
+    }
+    return res;
+}
+static inline bool itob_i8_bool(int8_t x)
+{
+    return x;
+}
+static inline bool itob_i16_bool(int16_t x)
+{
+    return x;
+}
+static inline bool itob_i32_bool(int32_t x)
+{
+    return x;
+}
+static inline bool itob_i64_bool(int64_t x)
+{
+    return x;
+}
+static inline int8_t btoi_bool_i8(bool x)
+{
+    return x;
+}
+static inline int16_t btoi_bool_i16(bool x)
+{
+    return x;
+}
+static inline int32_t btoi_bool_i32(bool x)
+{
+    return x;
+}
+static inline int64_t btoi_bool_i64(bool x)
+{
+    return x;
+}
+#define sext_i8_i8(x) ((int8_t) (int8_t) x)
+#define sext_i8_i16(x) ((int16_t) (int8_t) x)
+#define sext_i8_i32(x) ((int32_t) (int8_t) x)
+#define sext_i8_i64(x) ((int64_t) (int8_t) x)
+#define sext_i16_i8(x) ((int8_t) (int16_t) x)
+#define sext_i16_i16(x) ((int16_t) (int16_t) x)
+#define sext_i16_i32(x) ((int32_t) (int16_t) x)
+#define sext_i16_i64(x) ((int64_t) (int16_t) x)
+#define sext_i32_i8(x) ((int8_t) (int32_t) x)
+#define sext_i32_i16(x) ((int16_t) (int32_t) x)
+#define sext_i32_i32(x) ((int32_t) (int32_t) x)
+#define sext_i32_i64(x) ((int64_t) (int32_t) x)
+#define sext_i64_i8(x) ((int8_t) (int64_t) x)
+#define sext_i64_i16(x) ((int16_t) (int64_t) x)
+#define sext_i64_i32(x) ((int32_t) (int64_t) x)
+#define sext_i64_i64(x) ((int64_t) (int64_t) x)
+#define zext_i8_i8(x) ((int8_t) (uint8_t) x)
+#define zext_i8_i16(x) ((int16_t) (uint8_t) x)
+#define zext_i8_i32(x) ((int32_t) (uint8_t) x)
+#define zext_i8_i64(x) ((int64_t) (uint8_t) x)
+#define zext_i16_i8(x) ((int8_t) (uint16_t) x)
+#define zext_i16_i16(x) ((int16_t) (uint16_t) x)
+#define zext_i16_i32(x) ((int32_t) (uint16_t) x)
+#define zext_i16_i64(x) ((int64_t) (uint16_t) x)
+#define zext_i32_i8(x) ((int8_t) (uint32_t) x)
+#define zext_i32_i16(x) ((int16_t) (uint32_t) x)
+#define zext_i32_i32(x) ((int32_t) (uint32_t) x)
+#define zext_i32_i64(x) ((int64_t) (uint32_t) x)
+#define zext_i64_i8(x) ((int8_t) (uint64_t) x)
+#define zext_i64_i16(x) ((int16_t) (uint64_t) x)
+#define zext_i64_i32(x) ((int32_t) (uint64_t) x)
+#define zext_i64_i64(x) ((int64_t) (uint64_t) x)
+#if defined(__OPENCL_VERSION__)
+static int32_t futrts_popc8(int8_t x)
+{
+    return popcount(x);
+}
+static int32_t futrts_popc16(int16_t x)
+{
+    return popcount(x);
+}
+static int32_t futrts_popc32(int32_t x)
+{
+    return popcount(x);
+}
+static int32_t futrts_popc64(int64_t x)
+{
+    return popcount(x);
+}
+#elif defined(__CUDA_ARCH__)
+static int32_t futrts_popc8(int8_t x)
+{
+    return __popc(zext_i8_i32(x));
+}
+static int32_t futrts_popc16(int16_t x)
+{
+    return __popc(zext_i16_i32(x));
+}
+static int32_t futrts_popc32(int32_t x)
+{
+    return __popc(x);
+}
+static int32_t futrts_popc64(int64_t x)
+{
+    return __popcll(x);
+}
+#else
+static int32_t futrts_popc8(int8_t x)
+{
+    int c = 0;
+    
+    for (; x; ++c)
+        x &= x - 1;
+    return c;
+}
+static int32_t futrts_popc16(int16_t x)
+{
+    int c = 0;
+    
+    for (; x; ++c)
+        x &= x - 1;
+    return c;
+}
+static int32_t futrts_popc32(int32_t x)
+{
+    int c = 0;
+    
+    for (; x; ++c)
+        x &= x - 1;
+    return c;
+}
+static int32_t futrts_popc64(int64_t x)
+{
+    int c = 0;
+    
+    for (; x; ++c)
+        x &= x - 1;
+    return c;
+}
+#endif
+#if defined(__OPENCL_VERSION__)
+static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b)
+{
+    return mul_hi(a, b);
+}
+static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b)
+{
+    return mul_hi(a, b);
+}
+static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b)
+{
+    return mul_hi(a, b);
+}
+static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b)
+{
+    return mul_hi(a, b);
+}
+#elif defined(__CUDA_ARCH__)
+static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b)
+{
+    uint16_t aa = a;
+    uint16_t bb = b;
+    
+    return aa * bb >> 8;
+}
+static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b)
+{
+    uint32_t aa = a;
+    uint32_t bb = b;
+    
+    return aa * bb >> 16;
+}
+static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b)
+{
+    return mulhi(a, b);
+}
+static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b)
+{
+    return mul64hi(a, b);
+}
+#else
+static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b)
+{
+    uint16_t aa = a;
+    uint16_t bb = b;
+    
+    return aa * bb >> 8;
+}
+static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b)
+{
+    uint32_t aa = a;
+    uint32_t bb = b;
+    
+    return aa * bb >> 16;
+}
+static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b)
+{
+    uint64_t aa = a;
+    uint64_t bb = b;
+    
+    return aa * bb >> 32;
+}
+static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b)
+{
+    __uint128_t aa = a;
+    __uint128_t bb = b;
+    
+    return aa * bb >> 64;
+}
+#endif
+#if defined(__OPENCL_VERSION__)
+static uint8_t futrts_mad_hi8(uint8_t a, uint8_t b, uint8_t c)
+{
+    return mad_hi(a, b, c);
+}
+static uint16_t futrts_mad_hi16(uint16_t a, uint16_t b, uint16_t c)
+{
+    return mad_hi(a, b, c);
+}
+static uint32_t futrts_mad_hi32(uint32_t a, uint32_t b, uint32_t c)
+{
+    return mad_hi(a, b, c);
+}
+static uint64_t futrts_mad_hi64(uint64_t a, uint64_t b, uint64_t c)
+{
+    return mad_hi(a, b, c);
+}
+#else
+static uint8_t futrts_mad_hi8(uint8_t a, uint8_t b, uint8_t c)
+{
+    return futrts_mul_hi8(a, b) + c;
+}
+static uint16_t futrts_mad_hi16(uint16_t a, uint16_t b, uint16_t c)
+{
+    return futrts_mul_hi16(a, b) + c;
+}
+static uint32_t futrts_mad_hi32(uint32_t a, uint32_t b, uint32_t c)
+{
+    return futrts_mul_hi32(a, b) + c;
+}
+static uint64_t futrts_mad_hi64(uint64_t a, uint64_t b, uint64_t c)
+{
+    return futrts_mul_hi64(a, b) + c;
+}
+#endif
+#if defined(__OPENCL_VERSION__)
+static int32_t futrts_clzz8(int8_t x)
+{
+    return clz(x);
+}
+static int32_t futrts_clzz16(int16_t x)
+{
+    return clz(x);
+}
+static int32_t futrts_clzz32(int32_t x)
+{
+    return clz(x);
+}
+static int32_t futrts_clzz64(int64_t x)
+{
+    return clz(x);
+}
+#elif defined(__CUDA_ARCH__)
+static int32_t futrts_clzz8(int8_t x)
+{
+    return __clz(zext_i8_i32(x)) - 24;
+}
+static int32_t futrts_clzz16(int16_t x)
+{
+    return __clz(zext_i16_i32(x)) - 16;
+}
+static int32_t futrts_clzz32(int32_t x)
+{
+    return __clz(x);
+}
+static int32_t futrts_clzz64(int64_t x)
+{
+    return __clzll(x);
+}
+#else
+static int32_t futrts_clzz8(int8_t x)
+{
+    int n = 0;
+    int bits = sizeof(x) * 8;
+    
+    for (int i = 0; i < bits; i++) {
+        if (x < 0)
+            break;
+        n++;
+        x <<= 1;
+    }
+    return n;
+}
+static int32_t futrts_clzz16(int16_t x)
+{
+    int n = 0;
+    int bits = sizeof(x) * 8;
+    
+    for (int i = 0; i < bits; i++) {
+        if (x < 0)
+            break;
+        n++;
+        x <<= 1;
+    }
+    return n;
+}
+static int32_t futrts_clzz32(int32_t x)
+{
+    int n = 0;
+    int bits = sizeof(x) * 8;
+    
+    for (int i = 0; i < bits; i++) {
+        if (x < 0)
+            break;
+        n++;
+        x <<= 1;
+    }
+    return n;
+}
+static int32_t futrts_clzz64(int64_t x)
+{
+    int n = 0;
+    int bits = sizeof(x) * 8;
+    
+    for (int i = 0; i < bits; i++) {
+        if (x < 0)
+            break;
+        n++;
+        x <<= 1;
+    }
+    return n;
+}
+#endif
+#if defined(__OPENCL_VERSION__)
+static int32_t futrts_ctzz8(int8_t x)
+{
+    int i = 0;
+    
+    for (; i < 8 && (x & 1) == 0; i++, x >>= 1)
+        ;
+    return i;
+}
+static int32_t futrts_ctzz16(int16_t x)
+{
+    int i = 0;
+    
+    for (; i < 16 && (x & 1) == 0; i++, x >>= 1)
+        ;
+    return i;
+}
+static int32_t futrts_ctzz32(int32_t x)
+{
+    int i = 0;
+    
+    for (; i < 32 && (x & 1) == 0; i++, x >>= 1)
+        ;
+    return i;
+}
+static int32_t futrts_ctzz64(int64_t x)
+{
+    int i = 0;
+    
+    for (; i < 64 && (x & 1) == 0; i++, x >>= 1)
+        ;
+    return i;
+}
+#elif defined(__CUDA_ARCH__)
+static int32_t futrts_ctzz8(int8_t x)
+{
+    int y = __ffs(x);
+    
+    return y == 0 ? 8 : y - 1;
+}
+static int32_t futrts_ctzz16(int16_t x)
+{
+    int y = __ffs(x);
+    
+    return y == 0 ? 16 : y - 1;
+}
+static int32_t futrts_ctzz32(int32_t x)
+{
+    int y = __ffs(x);
+    
+    return y == 0 ? 32 : y - 1;
+}
+static int32_t futrts_ctzz64(int64_t x)
+{
+    int y = __ffsll(x);
+    
+    return y == 0 ? 64 : y - 1;
+}
+#else
+static int32_t futrts_ctzz8(int8_t x)
+{
+    return x == 0 ? 8 : __builtin_ctz((uint32_t) x);
+}
+static int32_t futrts_ctzz16(int16_t x)
+{
+    return x == 0 ? 16 : __builtin_ctz((uint32_t) x);
+}
+static int32_t futrts_ctzz32(int32_t x)
+{
+    return x == 0 ? 32 : __builtin_ctz(x);
+}
+static int32_t futrts_ctzz64(int64_t x)
+{
+    return x == 0 ? 64 : __builtin_ctzll(x);
+}
+#endif
+static inline float fdiv32(float x, float y)
+{
+    return x / y;
+}
+static inline float fadd32(float x, float y)
+{
+    return x + y;
+}
+static inline float fsub32(float x, float y)
+{
+    return x - y;
+}
+static inline float fmul32(float x, float y)
+{
+    return x * y;
+}
+static inline float fmin32(float x, float y)
+{
+    return fmin(x, y);
+}
+static inline float fmax32(float x, float y)
+{
+    return fmax(x, y);
+}
+static inline float fpow32(float x, float y)
+{
+    return pow(x, y);
+}
+static inline bool cmplt32(float x, float y)
+{
+    return x < y;
+}
+static inline bool cmple32(float x, float y)
+{
+    return x <= y;
+}
+static inline float sitofp_i8_f32(int8_t x)
+{
+    return (float) x;
+}
+static inline float sitofp_i16_f32(int16_t x)
+{
+    return (float) x;
+}
+static inline float sitofp_i32_f32(int32_t x)
+{
+    return (float) x;
+}
+static inline float sitofp_i64_f32(int64_t x)
+{
+    return (float) x;
+}
+static inline float uitofp_i8_f32(uint8_t x)
+{
+    return (float) x;
+}
+static inline float uitofp_i16_f32(uint16_t x)
+{
+    return (float) x;
+}
+static inline float uitofp_i32_f32(uint32_t x)
+{
+    return (float) x;
+}
+static inline float uitofp_i64_f32(uint64_t x)
+{
+    return (float) x;
+}
+static inline int8_t fptosi_f32_i8(float x)
+{
+    return (int8_t) x;
+}
+static inline int16_t fptosi_f32_i16(float x)
+{
+    return (int16_t) x;
+}
+static inline int32_t fptosi_f32_i32(float x)
+{
+    return (int32_t) x;
+}
+static inline int64_t fptosi_f32_i64(float x)
+{
+    return (int64_t) x;
+}
+static inline uint8_t fptoui_f32_i8(float x)
+{
+    return (uint8_t) x;
+}
+static inline uint16_t fptoui_f32_i16(float x)
+{
+    return (uint16_t) x;
+}
+static inline uint32_t fptoui_f32_i32(float x)
+{
+    return (uint32_t) x;
+}
+static inline uint64_t fptoui_f32_i64(float x)
+{
+    return (uint64_t) x;
+}
+static inline double fdiv64(double x, double y)
+{
+    return x / y;
+}
+static inline double fadd64(double x, double y)
+{
+    return x + y;
+}
+static inline double fsub64(double x, double y)
+{
+    return x - y;
+}
+static inline double fmul64(double x, double y)
+{
+    return x * y;
+}
+static inline double fmin64(double x, double y)
+{
+    return fmin(x, y);
+}
+static inline double fmax64(double x, double y)
+{
+    return fmax(x, y);
+}
+static inline double fpow64(double x, double y)
+{
+    return pow(x, y);
+}
+static inline bool cmplt64(double x, double y)
+{
+    return x < y;
+}
+static inline bool cmple64(double x, double y)
+{
+    return x <= y;
+}
+static inline double sitofp_i8_f64(int8_t x)
+{
+    return (double) x;
+}
+static inline double sitofp_i16_f64(int16_t x)
+{
+    return (double) x;
+}
+static inline double sitofp_i32_f64(int32_t x)
+{
+    return (double) x;
+}
+static inline double sitofp_i64_f64(int64_t x)
+{
+    return (double) x;
+}
+static inline double uitofp_i8_f64(uint8_t x)
+{
+    return (double) x;
+}
+static inline double uitofp_i16_f64(uint16_t x)
+{
+    return (double) x;
+}
+static inline double uitofp_i32_f64(uint32_t x)
+{
+    return (double) x;
+}
+static inline double uitofp_i64_f64(uint64_t x)
+{
+    return (double) x;
+}
+static inline int8_t fptosi_f64_i8(double x)
+{
+    return (int8_t) x;
+}
+static inline int16_t fptosi_f64_i16(double x)
+{
+    return (int16_t) x;
+}
+static inline int32_t fptosi_f64_i32(double x)
+{
+    return (int32_t) x;
+}
+static inline int64_t fptosi_f64_i64(double x)
+{
+    return (int64_t) x;
+}
+static inline uint8_t fptoui_f64_i8(double x)
+{
+    return (uint8_t) x;
+}
+static inline uint16_t fptoui_f64_i16(double x)
+{
+    return (uint16_t) x;
+}
+static inline uint32_t fptoui_f64_i32(double x)
+{
+    return (uint32_t) x;
+}
+static inline uint64_t fptoui_f64_i64(double x)
+{
+    return (uint64_t) x;
+}
+static inline float fpconv_f32_f32(float x)
+{
+    return (float) x;
+}
+static inline double fpconv_f32_f64(float x)
+{
+    return (double) x;
+}
+static inline float fpconv_f64_f32(double x)
+{
+    return (float) x;
+}
+static inline double fpconv_f64_f64(double x)
+{
+    return (double) x;
+}
+static inline float futrts_log32(float x)
+{
+    return log(x);
+}
+static inline float futrts_log2_32(float x)
+{
+    return log2(x);
+}
+static inline float futrts_log10_32(float x)
+{
+    return log10(x);
+}
+static inline float futrts_sqrt32(float x)
+{
+    return sqrt(x);
+}
+static inline float futrts_exp32(float x)
+{
+    return exp(x);
+}
+static inline float futrts_cos32(float x)
+{
+    return cos(x);
+}
+static inline float futrts_sin32(float x)
+{
+    return sin(x);
+}
+static inline float futrts_tan32(float x)
+{
+    return tan(x);
+}
+static inline float futrts_acos32(float x)
+{
+    return acos(x);
+}
+static inline float futrts_asin32(float x)
+{
+    return asin(x);
+}
+static inline float futrts_atan32(float x)
+{
+    return atan(x);
+}
+static inline float futrts_cosh32(float x)
+{
+    return cosh(x);
+}
+static inline float futrts_sinh32(float x)
+{
+    return sinh(x);
+}
+static inline float futrts_tanh32(float x)
+{
+    return tanh(x);
+}
+static inline float futrts_acosh32(float x)
+{
+    return acosh(x);
+}
+static inline float futrts_asinh32(float x)
+{
+    return asinh(x);
+}
+static inline float futrts_atanh32(float x)
+{
+    return atanh(x);
+}
+static inline float futrts_atan2_32(float x, float y)
+{
+    return atan2(x, y);
+}
+static inline float futrts_gamma32(float x)
+{
+    return tgamma(x);
+}
+static inline float futrts_lgamma32(float x)
+{
+    return lgamma(x);
+}
+static inline bool futrts_isnan32(float x)
+{
+    return isnan(x);
+}
+static inline bool futrts_isinf32(float x)
+{
+    return isinf(x);
+}
+static inline int32_t futrts_to_bits32(float x)
+{
+    union {
+        float f;
+        int32_t t;
+    } p;
+    
+    p.f = x;
+    return p.t;
+}
+static inline float futrts_from_bits32(int32_t x)
+{
+    union {
+        int32_t f;
+        float t;
+    } p;
+    
+    p.f = x;
+    return p.t;
+}
+#ifdef __OPENCL_VERSION__
+static inline float fmod32(float x, float y)
+{
+    return fmod(x, y);
+}
+static inline float futrts_round32(float x)
+{
+    return rint(x);
+}
+static inline float futrts_floor32(float x)
+{
+    return floor(x);
+}
+static inline float futrts_ceil32(float x)
+{
+    return ceil(x);
+}
+static inline float futrts_lerp32(float v0, float v1, float t)
+{
+    return mix(v0, v1, t);
+}
+static inline float futrts_mad32(float a, float b, float c)
+{
+    return mad(a, b, c);
+}
+static inline float futrts_fma32(float a, float b, float c)
+{
+    return fma(a, b, c);
+}
+#else
+static inline float fmod32(float x, float y)
+{
+    return fmodf(x, y);
+}
+static inline float futrts_round32(float x)
+{
+    return rintf(x);
+}
+static inline float futrts_floor32(float x)
+{
+    return floorf(x);
+}
+static inline float futrts_ceil32(float x)
+{
+    return ceilf(x);
+}
+static inline float futrts_lerp32(float v0, float v1, float t)
+{
+    return v0 + (v1 - v0) * t;
+}
+static inline float futrts_mad32(float a, float b, float c)
+{
+    return a * b + c;
+}
+static inline float futrts_fma32(float a, float b, float c)
+{
+    return fmaf(a, b, c);
+}
+#endif
+static inline double futrts_log64(double x)
+{
+    return log(x);
+}
+static inline double futrts_log2_64(double x)
+{
+    return log2(x);
+}
+static inline double futrts_log10_64(double x)
+{
+    return log10(x);
+}
+static inline double futrts_sqrt64(double x)
+{
+    return sqrt(x);
+}
+static inline double futrts_exp64(double x)
+{
+    return exp(x);
+}
+static inline double futrts_cos64(double x)
+{
+    return cos(x);
+}
+static inline double futrts_sin64(double x)
+{
+    return sin(x);
+}
+static inline double futrts_tan64(double x)
+{
+    return tan(x);
+}
+static inline double futrts_acos64(double x)
+{
+    return acos(x);
+}
+static inline double futrts_asin64(double x)
+{
+    return asin(x);
+}
+static inline double futrts_atan64(double x)
+{
+    return atan(x);
+}
+static inline double futrts_cosh64(double x)
+{
+    return cosh(x);
+}
+static inline double futrts_sinh64(double x)
+{
+    return sinh(x);
+}
+static inline double futrts_tanh64(double x)
+{
+    return tanh(x);
+}
+static inline double futrts_acosh64(double x)
+{
+    return acosh(x);
+}
+static inline double futrts_asinh64(double x)
+{
+    return asinh(x);
+}
+static inline double futrts_atanh64(double x)
+{
+    return atanh(x);
+}
+static inline double futrts_atan2_64(double x, double y)
+{
+    return atan2(x, y);
+}
+static inline double futrts_gamma64(double x)
+{
+    return tgamma(x);
+}
+static inline double futrts_lgamma64(double x)
+{
+    return lgamma(x);
+}
+static inline double futrts_fma64(double a, double b, double c)
+{
+    return fma(a, b, c);
+}
+static inline double futrts_round64(double x)
+{
+    return rint(x);
+}
+static inline double futrts_ceil64(double x)
+{
+    return ceil(x);
+}
+static inline double futrts_floor64(double x)
+{
+    return floor(x);
+}
+static inline bool futrts_isnan64(double x)
+{
+    return isnan(x);
+}
+static inline bool futrts_isinf64(double x)
+{
+    return isinf(x);
+}
+static inline int64_t futrts_to_bits64(double x)
+{
+    union {
+        double f;
+        int64_t t;
+    } p;
+    
+    p.f = x;
+    return p.t;
+}
+static inline double futrts_from_bits64(int64_t x)
+{
+    union {
+        int64_t f;
+        double t;
+    } p;
+    
+    p.f = x;
+    return p.t;
+}
+static inline double fmod64(double x, double y)
+{
+    return fmod(x, y);
+}
+#ifdef __OPENCL_VERSION__
+static inline double futrts_lerp64(double v0, double v1, double t)
+{
+    return mix(v0, v1, t);
+}
+static inline double futrts_mad64(double a, double b, double c)
+{
+    return mad(a, b, c);
+}
+#else
+static inline double futrts_lerp64(double v0, double v1, double t)
+{
+    return v0 + (v1 - v0) * t;
+}
+static inline double futrts_mad64(double a, double b, double c)
+{
+    return a * b + c;
+}
+#endif
+static int init_constants(struct futhark_context *);
+static int free_constants(struct futhark_context *);
+struct memblock {
+    int *references;
+    char *mem;
+    int64_t size;
+    const char *desc;
+} ;
+struct futhark_context_config {
+    int debugging;
+} ;
+struct futhark_context_config *futhark_context_config_new(void)
+{
+    struct futhark_context_config *cfg =
+                                  (struct futhark_context_config *) malloc(sizeof(struct futhark_context_config));
+    
+    if (cfg == NULL)
+        return NULL;
+    cfg->debugging = 0;
+    return cfg;
+}
+void futhark_context_config_free(struct futhark_context_config *cfg)
+{
+    free(cfg);
+}
+void futhark_context_config_set_debugging(struct futhark_context_config *cfg,
+                                          int detail)
+{
+    cfg->debugging = detail;
+}
+void futhark_context_config_set_logging(struct futhark_context_config *cfg,
+                                        int detail)
+{
+    /* Does nothing for this backend. */
+    (void) cfg;
+    (void) detail;
+}
+struct futhark_context {
+    int detail_memory;
+    int debugging;
+    int profiling;
+    lock_t lock;
+    char *error;
+    int profiling_paused;
+    int64_t peak_mem_usage_default;
+    int64_t cur_mem_usage_default;
+    struct {
+        int dummy;
+    } constants;
+} ;
+struct futhark_context *futhark_context_new(struct futhark_context_config *cfg)
+{
+    struct futhark_context *ctx =
+                           (struct futhark_context *) malloc(sizeof(struct futhark_context));
+    
+    if (ctx == NULL)
+        return NULL;
+    ctx->detail_memory = cfg->debugging;
+    ctx->debugging = cfg->debugging;
+    ctx->profiling = cfg->debugging;
+    ctx->error = NULL;
+    create_lock(&ctx->lock);
+    ctx->peak_mem_usage_default = 0;
+    ctx->cur_mem_usage_default = 0;
+    init_constants(ctx);
+    return ctx;
+}
+void futhark_context_free(struct futhark_context *ctx)
+{
+    free_constants(ctx);
+    free_lock(&ctx->lock);
+    free(ctx);
+}
+int futhark_context_sync(struct futhark_context *ctx)
+{
+    (void) ctx;
+    return 0;
+}
+int futhark_context_clear_caches(struct futhark_context *ctx)
+{
+    (void) ctx;
+    return 0;
+}
+static int memblock_unref(struct futhark_context *ctx, struct memblock *block,
+                          const char *desc)
+{
+    if (block->references != NULL) {
+        *block->references -= 1;
+        if (ctx->detail_memory)
+            fprintf(stderr,
+                    "Unreferencing block %s (allocated as %s) in %s: %d references remaining.\n",
+                    desc, block->desc, "default space", *block->references);
+        if (*block->references == 0) {
+            ctx->cur_mem_usage_default -= block->size;
+            free(block->mem);
+            free(block->references);
+            if (ctx->detail_memory)
+                fprintf(stderr,
+                        "%lld bytes freed (now allocated: %lld bytes)\n",
+                        (long long) block->size,
+                        (long long) ctx->cur_mem_usage_default);
+        }
+        block->references = NULL;
+    }
+    return 0;
+}
+static int memblock_alloc(struct futhark_context *ctx, struct memblock *block,
+                          int64_t size, const char *desc)
+{
+    if (size < 0)
+        futhark_panic(1,
+                      "Negative allocation of %lld bytes attempted for %s in %s.\n",
+                      (long long) size, desc, "default space",
+                      ctx->cur_mem_usage_default);
+    
+    int ret = memblock_unref(ctx, block, desc);
+    
+    ctx->cur_mem_usage_default += size;
+    if (ctx->detail_memory)
+        fprintf(stderr,
+                "Allocating %lld bytes for %s in %s (then allocated: %lld bytes)",
+                (long long) size, desc, "default space",
+                (long long) ctx->cur_mem_usage_default);
+    if (ctx->cur_mem_usage_default > ctx->peak_mem_usage_default) {
+        ctx->peak_mem_usage_default = ctx->cur_mem_usage_default;
+        if (ctx->detail_memory)
+            fprintf(stderr, " (new peak).\n");
+    } else if (ctx->detail_memory)
+        fprintf(stderr, ".\n");
+    block->mem = (char *) malloc(size);
+    block->references = (int *) malloc(sizeof(int));
+    *block->references = 1;
+    block->size = size;
+    block->desc = desc;
+    return ret;
+}
+static int memblock_set(struct futhark_context *ctx, struct memblock *lhs,
+                        struct memblock *rhs, const char *lhs_desc)
+{
+    int ret = memblock_unref(ctx, lhs, lhs_desc);
+    
+    if (rhs->references != NULL)
+        (*rhs->references)++;
+    *lhs = *rhs;
+    return ret;
+}
+char *futhark_context_report(struct futhark_context *ctx)
+{
+    struct str_builder builder;
+    
+    str_builder_init(&builder);
+    if (ctx->detail_memory || ctx->profiling) {
+        { }
+    }
+    if (ctx->profiling) { }
+    return builder.str;
+}
+char *futhark_context_get_error(struct futhark_context *ctx)
+{
+    char *error = ctx->error;
+    
+    ctx->error = NULL;
+    return error;
+}
+void futhark_context_pause_profiling(struct futhark_context *ctx)
+{
+    ctx->profiling_paused = 1;
+}
+void futhark_context_unpause_profiling(struct futhark_context *ctx)
+{
+    ctx->profiling_paused = 0;
+}
+static int init_constants(struct futhark_context *ctx)
+{
+    (void) ctx;
+    
+    int err = 0;
+    
+    
+  cleanup:
+    return err;
+}
+static int free_constants(struct futhark_context *ctx)
+{
+    (void) ctx;
+    return 0;
+}
diff --git a/presentations/pasc/code/gfx.c b/presentations/pasc/code/gfx.c
new file mode 100644
index 0000000000000000000000000000000000000000..e5bc36e4d2eeaae679a9b4c96de5200c43effac5
--- /dev/null
+++ b/presentations/pasc/code/gfx.c
@@ -0,0 +1,98 @@
+#ifdef PTK_SDL
+
+/// @file gfx.c
+/// @author Florent Gluck
+/// @date November 6, 2016
+/// Helper routines to render pixels in fullscreen graphic mode.
+/// Uses the SDL2 library.
+
+#include "gfx.h"
+
+/// Create a fullscreen graphic window.
+/// @param title Title of the window.
+/// @param width Width of the window in pixels.
+/// @param height Height of the window in pixels.
+/// @return a pointer to the graphic context or NULL if it failed.
+struct gfx_context_t* gfx_create(char *title, uint width, uint height) {
+	if (SDL_Init(SDL_INIT_VIDEO) != 0) goto error;
+	SDL_Window *window = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED,
+			SDL_WINDOWPOS_CENTERED, width, height, SDL_WINDOW_RESIZABLE);
+	SDL_Renderer *renderer = SDL_CreateRenderer(window, -1, 0);
+	SDL_Texture *texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888,
+			SDL_TEXTUREACCESS_STREAMING, width, height);
+	uint32_t *pixels = malloc(width*height*sizeof(uint32_t));
+	struct gfx_context_t *ctxt = malloc(sizeof(struct gfx_context_t));
+
+	if (!window || !renderer || !texture || !pixels || !ctxt) goto error;
+
+	ctxt->renderer = renderer;
+	ctxt->texture = texture;
+	ctxt->window = window;
+	ctxt->width = width;
+	ctxt->height = height;
+	ctxt->pixels = pixels;
+
+	SDL_ShowCursor(SDL_DISABLE);
+	gfx_clear(ctxt, COLOR_BLACK);
+	return ctxt;
+
+error:
+	return NULL;
+}
+
+/// Draw a pixel in the specified graphic context.
+/// @param ctxt Graphic context where the pixel is to be drawn.
+/// @param x X coordinate of the pixel.
+/// @param y Y coordinate of the pixel.
+/// @param color Color of the pixel.
+void gfx_putpixel(struct gfx_context_t *ctxt, int x, int y, uint32_t color) {
+	if (x < ctxt->width && y < ctxt->height)
+		ctxt->pixels[ctxt->width*y+x] = color;
+}
+
+/// Clear the specified graphic context.
+/// @param ctxt Graphic context to clear.
+/// @param color Color to use.
+void gfx_clear(struct gfx_context_t *ctxt, uint32_t color) {
+	int n = ctxt->width*ctxt->height;
+	while (n)
+		ctxt->pixels[--n] = color;
+}
+
+/// Display the graphic context.
+/// @param ctxt Graphic context to clear.
+void gfx_present(struct gfx_context_t *ctxt) {
+	SDL_UpdateTexture(ctxt->texture, NULL, ctxt->pixels, ctxt->width*sizeof(uint32_t));
+	SDL_RenderCopy(ctxt->renderer, ctxt->texture, NULL, NULL);
+	SDL_RenderPresent(ctxt->renderer);
+}
+
+/// Destroy a graphic window.
+/// @param ctxt Graphic context of the window to close.
+void gfx_destroy(struct gfx_context_t *ctxt) {
+	SDL_ShowCursor(SDL_ENABLE);
+	SDL_DestroyTexture(ctxt->texture);
+	SDL_DestroyRenderer(ctxt->renderer);
+	SDL_DestroyWindow(ctxt->window);
+	free(ctxt->pixels);
+	ctxt->texture = NULL;
+	ctxt->renderer = NULL;
+	ctxt->window = NULL;
+	ctxt->pixels = NULL;
+	SDL_Quit();
+	free(ctxt);
+}
+
+/// If a key was pressed, returns its key code (non blocking call).
+/// List of key codes: https://wiki.libsdl.org/SDL_Keycode
+/// SDL_PumpEvents() must be called before.
+/// @return 0 if escape was not pressed.
+SDL_Keycode gfx_keypressed() {
+	const Uint8 *state = SDL_GetKeyboardState(NULL);
+	if (state && state[SDL_SCANCODE_ESCAPE]) {
+		return SDLK_ESCAPE;
+	}
+	return 0;
+}
+
+#endif
\ No newline at end of file
diff --git a/presentations/pasc/code/gfx.h b/presentations/pasc/code/gfx.h
new file mode 100644
index 0000000000000000000000000000000000000000..fea1f9688660a3c129ac9a2a944eec4a048acd79
--- /dev/null
+++ b/presentations/pasc/code/gfx.h
@@ -0,0 +1,42 @@
+#ifdef PTK_SDL
+
+#ifndef _GFX_H_
+#define _GFX_H_
+
+#include <stdint.h>
+#include <SDL2/SDL.h>
+
+#define MAKE_COLOR(r,g,b) ((uint32_t)b|((uint32_t)g<<8)|((uint32_t)r<<16))
+
+#define COLOR_BLACK  0x00000000
+#define COLOR_RED    0x00FF0000
+#define COLOR_GREEN  0x0000FF00
+#define COLOR_BLUE   0x000000FF
+#define COLOR_WHITE  0x00FFFFFF
+#define COLOR_YELLOW 0x00FFFF00
+
+#define MAXCOLOR 255
+
+typedef unsigned int  uint;
+typedef unsigned long ulong;
+typedef unsigned char uchar;
+
+struct gfx_context_t {
+	SDL_Window *window;
+	SDL_Renderer *renderer;
+	SDL_Texture *texture;
+	uint32_t *pixels;
+	int width;
+	int height;
+};
+
+extern void gfx_putpixel(struct gfx_context_t *ctxt, int x, int y, uint32_t color);
+extern void gfx_clear(struct gfx_context_t *ctxt, uint32_t color);
+extern struct gfx_context_t* gfx_create(char *text, uint width, uint height);
+extern void gfx_destroy(struct gfx_context_t *ctxt);
+extern void gfx_present(struct gfx_context_t *ctxt);
+extern SDL_Keycode gfx_keypressed();
+
+#endif
+
+#endif
diff --git a/presentations/pasc/code/lbm.c b/presentations/pasc/code/lbm.c
new file mode 100644
index 0000000000000000000000000000000000000000..cba3802ee21ed156dbe9d24a2841d47935770481
--- /dev/null
+++ b/presentations/pasc/code/lbm.c
@@ -0,0 +1,337 @@
+#include "liblbm.h"
+#include "gfx.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <time.h>
+#include <float.h>
+#include <math.h>
+
+#define q 9
+#define d 2
+
+typedef float T;
+
+const float w[q] = {4.0 / 9.0, 1.0 / 36.0, 1.0 / 9.0, 1.0 / 36.0, 1.0 / 9.0, 1.0 / 36.0, 1.0 / 9.0, 1.0 / 36.0, 1.0 / 9.0};
+const int c[d][q] = { {0, -1, -1, -1,  0, +1, +1, +1, 0},
+                      {0, +1,  0, -1, -1, -1,  0, +1,+1} };
+
+/**
+ * Print how the program is supposed to be called
+ */
+void usage() {
+    printf("%s\n%s\n%s\n%s\n%s\n", "\nusage:\n	./lbm <nx> <ny> <tau> <iter>",
+         "where:", "	<nx> and <ny> are the mesh dimensions ( >= 10),",
+         "	<tau> is tau ( 1/2 < tau <= 2 )",
+         "	<iter> is the number of iterations");
+}
+
+float compute_max(float **field, int nx, int ny) {
+    float max = FLT_MIN;
+    for(int ix = 0; ix < nx; ++ix) {
+		for(int iy = 0; iy < ny; ++iy){
+            if (field[ix][iy] > max) {
+                max = field[ix][iy];
+            }
+        }
+    }
+    return max;
+}
+
+float compute_min(float **field, int nx, int ny) {
+    float min = FLT_MAX;
+    for(int ix = 0; ix < nx; ++ix) {
+		for(int iy = 0; iy < ny; ++iy){
+            if (field[ix][iy] < min) {
+                min = field[ix][iy];
+            }
+        }
+    }
+    return min;
+}
+
+float convert_to_zero_one(float num, float min, float max) {
+    return (num - min) / (max - min);
+}
+
+
+float ***allocate_3d(int nx, int ny, int nz) {
+    float ***f = (float ***)malloc(nx * sizeof(float **)); 
+    for (int ix = 0; ix < nx; ++ix) {
+        f[ix] = malloc(ny * sizeof(float *));
+        for (int iy = 0; iy < ny; ++iy) {
+            f[ix][iy] = malloc(nz * sizeof(float));
+        }
+    }
+    return f;
+}
+
+float compute_feq(float w, const int c[d], float rho, float u[d]) {
+    float c_u = c[0] * u[0];
+    float u_sqr = u[0] * u[0];
+    for (int id = 1; id < d; ++id) {
+        c_u   += c[id] * u[id];
+        u_sqr += u[id] * u[id];
+    }
+    return w * rho * (1.0 + 3.0 * c_u + 4.5 * c_u * c_u - 1.5 * u_sqr);
+}
+
+
+float ***allocate_and_ini_at_equilibrium(float rho, float u[d], int nx, int ny) {
+    float ***f = allocate_3d(nx, ny, q);
+    for (int ix = 0; ix < nx; ++ix) {
+        for (int iy = 0; iy < ny; ++iy) {
+            for (int ipop = 0; ipop < q; ++ipop) {
+                f[ix][iy][ipop] = compute_feq(w[ipop], c[ipop], rho, u);
+            }
+        }
+    }
+    return f;
+}
+
+float *allocate_and_ini_at_equilibrium_1d(float rho, float u[d], int nx, int ny) {
+    float *f = (float *)malloc(nx * ny * q * sizeof(float)); 
+    int i = 0;
+    for (int ix = 0; ix < nx; ++ix) {
+        for (int iy = 0; iy < ny; ++iy) {
+            float rho_ini = rho;
+            if (ix == nx / 4 && iy == ny / 4) {
+                rho_ini = 1.1;
+            } 
+            for (int ipop = 0; ipop < q; ++ipop) {
+                    f[i] = compute_feq(w[ipop], c[ipop], rho_ini, u);
+                    ++i;
+                }
+        }
+    }
+    return f;
+}
+
+void deallocate_3d(float ***f, int nx, int ny) {
+    for (int ix = 0; ix < nx; ++ix) {
+        for (int iy = 0; iy < ny; ++iy) {
+            free(f[ix][iy]); 
+        }
+        free(f[ix]); 
+    }
+    free(f);
+}
+
+float **allocate_2d(int nx, int ny) {
+    float **f = (float **)malloc(nx * sizeof(float *)); 
+    for (int ix = 0; ix < nx; ++ix) {
+        f[ix] = malloc(ny * sizeof(float));
+    }
+    return f;
+}
+
+void deallocate_2d(float **f, int nx) {
+    for (int ix = 0; ix < nx; ++ix) {
+        free(f[ix]); 
+    }
+    free(f);
+}
+
+float ***reshape_float_3d(int nx, int ny, int nz, struct futhark_f32_2d *f_in, struct futhark_context *ctx) {
+    float *data = malloc(nx * ny * nz * sizeof(float));
+    futhark_values_f32_2d(ctx, f_in, data);
+
+    float ***reshaped = allocate_3d(nx, ny, nz);
+
+    for (int ix = 0; ix < nx; ++ix) {
+        for (int iy = 0; iy < ny; ++iy) {
+            for (int iz = 0; iz < nz; ++iz) {
+                reshaped[ix][iy][iz] = data[iy + ny*(ix + nx*iz)];
+            }
+        }
+    }
+
+    return reshaped;
+}
+
+float ***compute_velocity(float ***f, int nx, int ny) {
+    float ***vel = allocate_3d(nx, ny, d);
+
+    for (int ix = 0; ix < nx; ++ix) {
+        for (int iy = 0; iy < ny; ++iy) {
+            float u[d] = {0.0, 0.0};
+            float rho = 0.0;
+            for (int ipop = 0; ipop < q; ++ipop) {
+                rho += f[ix][iy][ipop];
+                for (int id = 0; id < d; ++id) {
+                   u[id] += f[ix][iy][ipop] * c[id][ipop];
+                }
+            }
+            for (int id = 0; id < d; ++id) {
+                u[id] /= rho;
+                vel[ix][iy][id] = u[id];
+            }
+        }
+    }
+    return vel;
+}
+
+float **compute_density(float ***f, int nx, int ny) {
+    float **density = allocate_2d(nx, ny);
+
+    for (int ix = 0; ix < nx; ++ix) {
+        for (int iy = 0; iy < ny; ++iy) {
+            float rho = 0.0;
+            for (int ipop = 0; ipop < q; ++ipop) {
+                rho += f[ix][iy][ipop];
+            }
+            density[ix][iy] = rho;
+        }
+    }
+    return density;
+}
+
+float **compute_norm(float ***f, int nx, int ny, int nz) {
+    float **norm = allocate_2d(nx, ny);
+
+    for (int ix = 0; ix < nx; ++ix) {
+        for (int iy = 0; iy < ny; ++iy) {
+            float n = 0.0;
+            for (int id = 0; id < nz; ++id) {
+                n += f[ix][iy][id] * f[ix][iy][id];
+            }
+            norm[ix][iy] = sqrt(n);
+        }
+    }
+
+    return norm;
+}
+
+/**
+ * Ecriture du tableau des vitesses dans un fichier
+ **/
+void write_file_3d(float ***data, const int32_t nx, const int32_t ny, const int32_t nz, int i) {
+    // Create filename
+    char integer_string[10];
+    sprintf(integer_string, "%d", i);
+    char *filename = strcat(integer_string, ".txt");
+
+    // Open file
+    FILE *fp;
+    if ((fp = fopen(filename, "w")) == NULL) {
+        printf("Error ! opening file");
+        exit(EXIT_FAILURE);
+    }
+
+    // Fill file
+    for (int32_t x = 0; x < nx; x++) {
+        for (int32_t y = 0; y < ny; y++) {
+            for (int32_t z = 0; z < nz; z++) {
+                fprintf(fp, "%f ", data[x][y][z]);
+            }
+        }
+    }
+
+    fclose(fp);
+}
+
+#ifdef PTK_SDL
+void show_pixels(struct gfx_context_t* context, int nx, int ny, float **pixels) {
+    gfx_clear(context, COLOR_BLACK);
+
+    float min = compute_min(pixels, nx, ny);
+    float max = compute_max(pixels, nx, ny);
+
+    for(int ix = 0; ix < nx; ++ix) {
+		for(int iy = 0; iy < ny; ++iy){
+            int px = (int)(convert_to_zero_one(pixels[ix][iy], min, max) * MAXCOLOR);
+
+            uint32_t color = MAKE_COLOR(px, px, px);
+
+            gfx_putpixel(context, ix, iy, color);
+        }
+    }
+    gfx_present(context);
+}
+#endif
+
+
+int main(int argc, char **argv) {
+
+    // Mauvais nombre d'arguments
+    if (argc != 5) {
+        printf("Incorrect argument number\n");
+        usage();
+        return EXIT_FAILURE;
+    }
+    struct futhark_context_config *cfg = futhark_context_config_new();
+    struct futhark_context *ctx = futhark_context_new(cfg);
+    
+    const int32_t nx = atoi(argv[1]);
+    const int32_t ny = atoi(argv[2]);
+    const float tau = atof(argv[3]);
+    const int iter = atoi(argv[4]);
+
+#ifdef PTK_SDL
+    struct gfx_context_t* context = gfx_create("Porous medium", nx, ny);
+    if (!context) {
+        fprintf(stderr, "Graphic mode initialization failed!\n");
+        EXIT_FAILURE;
+    }
+    SDL_Keycode key_pressed = 0; // escape keyy needed
+#endif
+
+    // Norme des vitesses en chaque point du maillage (struct futhark et c)
+    float u[d] = {0.0, 0.0};
+    float *data = allocate_and_ini_at_equilibrium_1d(1.0, u, nx, ny);
+
+    struct futhark_f32_2d *f_1 = futhark_new_f32_2d(ctx,
+                                          data, nx*ny, q);
+
+    struct futhark_f32_2d *ff;
+
+    struct timespec start, finish;
+    clock_gettime(CLOCK_MONOTONIC, &start);
+    for (int it = 0; it < iter; ++it) {
+        futhark_entry_main(ctx, &f_1, nx, ny, 9, f_1, tau, 1);
+    }
+    clock_gettime(CLOCK_MONOTONIC, &finish);
+    float elapsed = (finish.tv_sec - start.tv_sec) * 1e6;
+    elapsed += (finish.tv_nsec - start.tv_nsec) / 1e3;
+    printf("elapsed microseconds = %f\n", elapsed);
+    printf("approximative MLUSP = %f\n", (float)nx * (float)ny * (float)iter / elapsed);
+
+    struct futhark_f32_2d *fout = f_1;
+    // if (f_1 == NULL) {
+    //     fout = f_2;
+    // }
+    int64_t *shape = futhark_shape_f32_2d(ctx, fout);
+    assert(nx*ny == shape[0] && q == shape[1]);
+
+    float ***f = reshape_float_3d(nx, ny, q, fout, ctx);
+    float **density = compute_density(f, nx, ny);
+    float ***vel = compute_velocity(f, nx, ny);
+    float **vel_norm = compute_norm(vel, nx, ny, d);
+    // write_file_3d(f, nx, ny, q, iter);
+#ifdef PTK_SDL
+    while(1) {
+        SDL_PumpEvents();
+
+        key_pressed = gfx_keypressed();
+        show_pixels(context, nx, ny, density);
+        if (key_pressed == SDLK_ESCAPE) {
+            break;
+        }
+    }
+    gfx_destroy(context);
+#endif
+
+
+    deallocate_3d(f, nx, ny);
+    deallocate_3d(vel, nx, ny);
+    deallocate_2d(vel_norm, nx);
+    deallocate_2d(density, nx);
+
+    free(fout);
+    free(data);
+
+
+
+    return EXIT_SUCCESS;
+}
diff --git a/presentations/pasc/code/lbm3d.c b/presentations/pasc/code/lbm3d.c
new file mode 100644
index 0000000000000000000000000000000000000000..1ebd1e1fdbcee33d520f47ddd74baa08f1082800
--- /dev/null
+++ b/presentations/pasc/code/lbm3d.c
@@ -0,0 +1,102 @@
+#include "liblbm.h"
+#include "c_interface/includes.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <time.h>
+#include <float.h>
+#include <math.h>
+
+/**
+ * Print how the program is supposed to be called
+ */
+void usage() {
+    printf("%s\n%s\n%s\n%s\n%s\n%s\n", "\nusage:\n	./lbm <n> <Re> <max_t> <prefix>",
+         "where:", "	<n> is the mesh dimension in each spatial direction ( >= 10),",
+         "\t<Re> is the Reynolds number",
+         "\t<max_t> is maximum simulation time in physical units.",
+         "\t<prefix> is the directory name where the output will be written.");
+}
+
+int main(int argc, char **argv) {
+
+    // Mauvais nombre d'arguments
+    if (argc != 5) {
+        printf("Incorrect argument number\n");
+        usage();
+        return EXIT_FAILURE;
+    }
+    struct futhark_context_config *cfg = futhark_context_config_new();
+    struct futhark_context *ctx = futhark_context_new(cfg);
+    
+    const fint n = atoi(argv[1]);
+    const T Re = atof(argv[2]);
+    const T end_time = atof(argv[3]);
+    const char *prefix = argv[4];
+
+    check_dir(prefix);
+
+    const T log_t = 1.0 / 24.0;
+    const T u_max = 0.05;
+
+
+    units_converter units = units_converter_create_inertial(Re * 2 * pi, 2 * pi, n, 1.0, u_max, 1.0, 1.0, 1.0);
+    units_converter_print(units);
+
+    const fint iter = (fint)(end_time / units.delta_t + 0.5);
+
+    // Norme des vitesses en chaque point du maillage (struct futhark et c)
+    T rho = 1.0;
+    tensor_field lattice = lbm_allocate_and_ini_at_equilibrium_kida(rho, u_max, n, n, n, 27);
+
+    struct futhark_f32_4d *f = tensor_field_to_futhark_f32_4d(lattice, ctx);
+
+    scalar_field density = lbm_compute_density(lattice);
+    tensor_field velocity = lbm_compute_velocity(lattice);
+
+    struct timespec start, finish;
+    fint delta_iter = max((fint)(log_t / units.delta_t + 0.5), 1);
+    clock_gettime(CLOCK_MONOTONIC, &start);
+    fint i = 0;
+    for (fint it = 0; it < iter; it += delta_iter) {
+        // {
+        //     tensor_field_from_futhark_f32_4d_inplace(f, ctx, lattice);
+        //     lbm_compute_density_inplace(lattice, density);
+        //     lbm_compute_velocity_inplace(lattice, velocity);
+
+        //     char *fname = vtk_create_fname(prefix, "out", it);
+
+        //     vtk_file vtk = vtk_file_create_default(fname, box_create(0, n-1, 0, n-1, 0, n-1), 1.0);
+        //     vtk_file_write_scalar_field(&vtk, density, "density");
+        //     vtk_file_write_tensor_field(&vtk, velocity, "velocity");
+        //     vtk_file_close(&vtk);
+            
+        //     free(fname);
+        // }
+
+        f = lbm_collide_and_stream(ctx, f, units.omega, delta_iter);
+        i += 1;
+    }
+    
+    futhark_context_sync(ctx);
+    clock_gettime(CLOCK_MONOTONIC, &finish);
+    T elapsed = (finish.tv_sec - start.tv_sec) * 1e6;
+    elapsed += (finish.tv_nsec - start.tv_nsec) / 1e3;
+    printf("elapsed microseconds = %f\n", elapsed);
+    printf("approximative MLUSP = %f\n", (T)n * (T)n * (T)n * (T)i * (T)delta_iter / elapsed);
+
+    
+
+    tensor_field_deallocate(lattice);
+    tensor_field_deallocate(velocity);
+    scalar_field_deallocate(density);
+
+    // futhark_free_f32_4d(ctx, f);
+    futhark_free_f32_4d(ctx, f);
+
+    futhark_context_config_free(cfg);
+    futhark_context_free(ctx);
+
+    return EXIT_SUCCESS;
+}
diff --git a/presentations/pasc/code/liblbm.fut b/presentations/pasc/code/liblbm.fut
new file mode 100644
index 0000000000000000000000000000000000000000..ea3c58b752e76969ab8870c815887ceea8586254
--- /dev/null
+++ b/presentations/pasc/code/liblbm.fut
@@ -0,0 +1,111 @@
+-- Program for Lattice-Boltzmann in Futhark
+-- Arrays everywhere
+
+-- Two useful functions for later
+let tabulate_4d 'a  (p: i64) (n: i64) (m: i64) (o: i64) (f: i64 -> i64 -> i64 -> i64 -> a): *[p][n][m][o]a =
+  map1 (f >-> tabulate_3d n m o) (iota p)
+
+let dotprod (xs: []f32) (ys: []f32): f32 =
+  reduce (+) 0 (map (\(x, y) -> x * y) (zip xs ys))
+
+------------------------------------
+------------ CONSTANTS -------------
+------------------------------------
+
+module d3q27 = {
+
+    let c: [][]f32 = [
+                [ 0, 0, 0],
+
+                [-1, 0, 0], [ 0,-1, 0], [ 0, 0,-1],
+                [-1,-1, 0], [-1, 1, 0], [-1, 0,-1],
+                [-1, 0, 1], [ 0,-1,-1], [ 0,-1, 1],
+                [-1,-1,-1], [-1,-1, 1], [-1, 1,-1], [-1, 1, 1],
+
+                [ 1, 0, 0], [ 0, 1, 0], [ 0, 0, 1],
+                [ 1, 1, 0], [ 1,-1, 0], [ 1, 0, 1],
+                [ 1, 0,-1], [ 0, 1, 1], [ 0, 1,-1],
+                [ 1, 1, 1], [ 1, 1,-1], [ 1,-1, 1], [ 1,-1,-1] ]
+
+    let w: []f32 = [
+                8.0/27,
+                2.0/27,  2.0/27,  2.0/27, 
+                1.0/54,  1.0/54,  1.0/54,
+                1.0/54,  1.0/54,  1.0/54,
+                1.0/216, 1.0/216, 1.0/216, 1.0/216,
+                2.0/27,  2.0/27,  2.0/27, 
+                1.0/54,  1.0/54,  1.0/54,
+                1.0/54,  1.0/54,  1.0/54,
+                1.0/216, 1.0/216, 1.0/216, 1.0/216
+            ]
+}
+
+let compute_rho [nx][ny][nz][q] (f: [nx][ny][nz][q]f32): [nx][ny][nz]f32 =
+  map(\fx ->
+    map(\fxy ->
+      map(\fxyz ->
+        reduce (+) 0 fxyz 
+      ) fxy
+    ) fx 
+  ) f
+
+let compute_j [nx][ny][nz][q][d] (f: [nx][ny][nz][q]f32)(c: [q][d]f32): [nx][ny][nz][d]f32 =
+  map(\fx ->
+    map(\fxy ->
+      map(\fxyz ->
+        map(\ci -> 
+          dotprod ci fxyz
+        ) (transpose c) -- intrinsic
+      ) fxy
+    ) fx 
+  ) f
+
+let compute_feq [nx][ny][nz][q][d] (w: [q]f32) (c: [q][d]f32)(rho: [nx][ny][nz]f32)(j: [nx][ny][nz][d]f32): [nx][ny][nz][q]f32 =
+  map2(\rho_x j_x ->
+    map2(\rho_xy j_xy ->
+      map2(\rho_xyz j_xyz ->
+        let u = map(\ji -> ji / rho_xyz) j_xyz
+        let u_sqr = dotprod u u 
+
+        in map2(\wi ci ->
+          let c_u = dotprod ci u
+          in wi * rho_xyz* (1 + 3*c_u + 4.5 * c_u * c_u - 1.5 * u_sqr)
+        ) w c
+      ) rho_xy j_xy
+    )rho_x j_x
+  ) rho j
+
+let collision [nx][ny][nz][q] (f: [nx][ny][nz][q]f32)(feq: [nx][ny][nz][q]f32) (omega: f32): [nx][ny][nz][q]f32 =
+  map2 (\fx feqx ->
+    map2(\fxy feqxy ->
+      map2(\fxyz feqxyz ->
+        map2(\fxyzi feqxyzi -> 
+          (1-omega) * fxyzi + omega * feqxyzi
+        ) fxyz feqxyz
+      ) fxy feqxy
+    ) fx feqx
+  ) f feq
+
+let collide [nx][ny][nz][q][d] (f: [nx][ny][nz][q]f32) (omega: f32) (w: [q]f32) (c: [q][d]f32): [nx][ny][nz][q]f32 =
+  let rho = compute_rho f
+  let j = compute_j f c
+  let feq = compute_feq w c rho j
+  
+  in collision f feq omega
+
+
+let streaming [nx][ny][nz][q][d] (f: [nx][ny][nz][q]f32) (c: [q][d]f32): [nx][ny][nz][q]f32 =
+    tabulate_4d nx ny nz q (\x y z i ->
+      let next_x = (x - (i64.f32 c[i,0]) + nx ) % nx
+      let next_y = (y - (i64.f32 c[i,1]) + ny ) % ny
+      let next_z = (z - (i64.f32 c[i,2]) + nz ) % nz
+
+      in f[next_x, next_y, next_z, i]
+    )
+
+entry main [nx][ny][nz] (fin: [nx][ny][nz][27]f32)(omega: f32) (max_t: i64): [nx][ny][nz][27]f32 =
+  let f = loop f = fin for _i < max_t do
+    streaming (collide f omega (copy d3q27.w) (copy d3q27.c)) (copy d3q27.c)
+  in f
+
+
diff --git a/presentations/pasc/default.latex b/presentations/pasc/default.latex
new file mode 100644
index 0000000000000000000000000000000000000000..4b9e09c2e796dfc683f4b0cbd7b9ee40fd640dd9
--- /dev/null
+++ b/presentations/pasc/default.latex
@@ -0,0 +1,288 @@
+\documentclass[$if(fontsize)$$fontsize$,$endif$$if(lang)$$babel-lang$,$endif$$if(papersize)$$papersize$paper,$endif$$for(classoption)$$classoption$$sep$,$endfor$]{$documentclass$}
+$if(beamerarticle)$
+\usepackage{beamerarticle} % needs to be loaded first
+$endif$
+$if(fontfamily)$
+\usepackage[$for(fontfamilyoptions)$$fontfamilyoptions$$sep$,$endfor$]{$fontfamily$}
+$else$
+\usepackage{lmodern}
+$endif$
+$if(linestretch)$
+\usepackage{setspace}
+\setstretch{$linestretch$}
+$endif$
+\usepackage{amssymb,amsmath,bm}
+\usepackage{ifxetex,ifluatex}
+\usepackage{fixltx2e} % provides \textsubscript
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+  \usepackage[$if(fontenc)$$fontenc$$else$T1$endif$]{fontenc}
+  \usepackage[utf8]{inputenc}
+$if(euro)$
+  \usepackage{eurosym}
+$endif$
+\else % if luatex or xelatex
+$if(mathspec)$
+  \ifxetex
+    \usepackage{mathspec}
+  \else
+    \usepackage{unicode-math}
+  \fi
+$else$
+  \usepackage{unicode-math}
+$endif$
+  \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase}
+$for(fontfamilies)$
+  \newfontfamily{$fontfamilies.name$}[$fontfamilies.options$]{$fontfamilies.font$}
+$endfor$
+$if(euro)$
+  \newcommand{\euro}{€}
+$endif$
+$if(mainfont)$
+    \setmainfont[$for(mainfontoptions)$$mainfontoptions$$sep$,$endfor$]{$mainfont$}
+$endif$
+$if(sansfont)$
+    \setsansfont[$for(sansfontoptions)$$sansfontoptions$$sep$,$endfor$]{$sansfont$}
+$endif$
+$if(monofont)$
+    \setmonofont[Mapping=tex-ansi$if(monofontoptions)$,$for(monofontoptions)$$monofontoptions$$sep$,$endfor$$endif$]{$monofont$}
+$endif$
+$if(mathfont)$
+$if(mathspec)$
+  \ifxetex
+    \setmathfont(Digits,Latin,Greek)[$for(mathfontoptions)$$mathfontoptions$$sep$,$endfor$]{$mathfont$}
+  \else
+    \setmathfont[$for(mathfontoptions)$$mathfontoptions$$sep$,$endfor$]{$mathfont$}
+  \fi
+$else$
+  \setmathfont[$for(mathfontoptions)$$mathfontoptions$$sep$,$endfor$]{$mathfont$}
+$endif$
+$endif$
+$if(CJKmainfont)$
+    \usepackage{xeCJK}
+    \setCJKmainfont[$for(CJKoptions)$$CJKoptions$$sep$,$endfor$]{$CJKmainfont$}
+$endif$
+\fi
+% use upquote if available, for straight quotes in verbatim environments
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
+% use microtype if available
+\IfFileExists{microtype.sty}{%
+\usepackage[$for(microtypeoptions)$$microtypeoptions$$sep$,$endfor$]{microtype}
+\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
+}{}
+\PassOptionsToPackage{hyphens}{url} % url is loaded by hyperref
+$if(verbatim-in-note)$
+\usepackage{fancyvrb}
+$endif$
+% \usepackage[unicode=true]{hyperref} commented because of conflict
+$if(colorlinks)$
+\PassOptionsToPackage{usenames,dvipsnames}{color} % color is loaded by hyperref
+$endif$
+\hypersetup{
+$if(title-meta)$
+            pdftitle={$title-meta$},
+$endif$
+$if(author-meta)$
+            pdfauthor={$author-meta$},
+$endif$
+$if(keywords)$
+            pdfkeywords={$for(keywords)$$keywords$$sep$, $endfor$},
+$endif$
+$if(colorlinks)$
+            colorlinks=true,
+            linkcolor=$if(linkcolor)$$linkcolor$$else$Maroon$endif$,
+            citecolor=$if(citecolor)$$citecolor$$else$Blue$endif$,
+            urlcolor=$if(urlcolor)$$urlcolor$$else$Blue$endif$,
+$else$
+            pdfborder={0 0 0},
+$endif$
+            breaklinks=true}
+\urlstyle{same}  % don't use monospace font for urls
+$if(verbatim-in-note)$
+\VerbatimFootnotes % allows verbatim text in footnotes
+$endif$
+$if(geometry)$
+\usepackage[$for(geometry)$$geometry$$sep$,$endfor$]{geometry}
+$endif$
+$if(lang)$
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+  \usepackage[shorthands=off,$for(babel-otherlangs)$$babel-otherlangs$,$endfor$main=$babel-lang$]{babel}
+$if(babel-newcommands)$
+  $babel-newcommands$
+$endif$
+\else
+  \usepackage{polyglossia}
+  \setmainlanguage[$polyglossia-lang.options$]{$polyglossia-lang.name$}
+$for(polyglossia-otherlangs)$
+  \setotherlanguage[$polyglossia-otherlangs.options$]{$polyglossia-otherlangs.name$}
+$endfor$
+\fi
+$endif$
+$if(natbib)$
+\usepackage{natbib}
+\bibliographystyle{$if(biblio-style)$$biblio-style$$else$plainnat$endif$}
+$endif$
+$if(biblatex)$
+\usepackage[$if(biblio-style)$style=$biblio-style$,$endif$$for(biblatexoptions)$$biblatexoptions$$sep$,$endfor$]{biblatex}
+$for(bibliography)$
+\addbibresource{$bibliography$}
+$endfor$
+$endif$
+$if(listings)$
+\usepackage{listings}
+$endif$
+$if(lhs)$
+\lstnewenvironment{code}{\lstset{language=Haskell,basicstyle=\small\ttfamily}}{}
+$endif$
+$if(highlighting-macros)$
+$highlighting-macros$
+$endif$
+$if(tables)$
+\usepackage{longtable,booktabs}
+% Fix footnotes in tables (requires footnote package)
+\IfFileExists{footnote.sty}{\usepackage{footnote}\makesavenoteenv{long table}}{}
+$endif$
+$if(graphics)$
+\usepackage{graphicx,grffile}
+\makeatletter
+\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
+\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
+\makeatother
+% Scale images if necessary, so that they will not overflow the page
+% margins by default, and it is still possible to overwrite the defaults
+% using explicit options in \includegraphics[width, height, ...]{}
+\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
+$endif$
+$if(links-as-notes)$
+% Make links footnotes instead of hotlinks:
+\renewcommand{\href}[2]{#2\footnote{\url{#1}}}
+$endif$
+$if(strikeout)$
+\usepackage[normalem]{ulem}
+% avoid problems with \sout in headers with hyperref:
+\pdfstringdefDisableCommands{\renewcommand{\sout}{}}
+$endif$
+$if(indent)$
+$else$
+\IfFileExists{parskip.sty}{%
+\usepackage{parskip}
+}{% else
+\setlength{\parindent}{0pt}
+\setlength{\parskip}{6pt plus 2pt minus 1pt}
+}
+$endif$
+\setlength{\emergencystretch}{3em}  % prevent overfull lines
+\providecommand{\tightlist}{%
+  \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
+$if(numbersections)$
+\setcounter{secnumdepth}{$if(secnumdepth)$$secnumdepth$$else$5$endif$}
+$else$
+\setcounter{secnumdepth}{0}
+$endif$
+$if(subparagraph)$
+$else$
+% Redefines (sub)paragraphs to behave more like sections
+\ifx\paragraph\undefined\else
+\let\oldparagraph\paragraph
+\renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
+\fi
+\ifx\subparagraph\undefined\else
+\let\oldsubparagraph\subparagraph
+\renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
+\fi
+$endif$
+$if(dir)$
+\ifxetex
+  % load bidi as late as possible as it modifies e.g. graphicx
+  $if(latex-dir-rtl)$
+  \usepackage[RTLdocument]{bidi}
+  $else$
+  \usepackage{bidi}
+  $endif$
+\fi
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+  \TeXXeTstate=1
+  \newcommand{\RL}[1]{\beginR #1\endR}
+  \newcommand{\LR}[1]{\beginL #1\endL}
+  \newenvironment{RTL}{\beginR}{\endR}
+  \newenvironment{LTR}{\beginL}{\endL}
+\fi
+$endif$
+
+% set default figure placement to htbp
+\makeatletter
+\def\fps@figure{htbp}
+\makeatother
+
+$for(header-includes)$
+$header-includes$
+$endfor$
+
+$if(title)$
+\title{$title$$if(thanks)$\thanks{$thanks$}$endif$}
+$endif$
+$if(subtitle)$
+\providecommand{\subtitle}[1]{}
+\subtitle{$subtitle$}
+$endif$
+$if(author)$
+\author{$for(author)$$author$$sep$ \and $endfor$}
+$endif$
+$if(institute)$
+\providecommand{\institute}[1]{}
+\institute{$for(institute)$$institute$$sep$ \and $endfor$}
+$endif$
+\date{$date$}
+
+\begin{document}
+$if(title)$
+\maketitle
+$endif$
+$if(abstract)$
+\begin{abstract}
+$abstract$
+\end{abstract}
+$endif$
+
+$for(include-before)$
+$include-before$
+
+$endfor$
+$if(toc)$
+{
+$if(colorlinks)$
+\hypersetup{linkcolor=$if(toccolor)$$toccolor$$else$black$endif$}
+$endif$
+\setcounter{tocdepth}{$toc-depth$}
+\tableofcontents
+}
+$endif$
+$if(lot)$
+\listoftables
+$endif$
+$if(lof)$
+\listoffigures
+$endif$
+$body$
+
+$if(natbib)$
+$if(bibliography)$
+$if(biblio-title)$
+$if(book-class)$
+\renewcommand\bibname{$biblio-title$}
+$else$
+\renewcommand\refname{$biblio-title$}
+$endif$
+$endif$
+\bibliography{$for(bibliography)$$bibliography$$sep$,$endfor$}
+
+$endif$
+$endif$
+$if(biblatex)$
+\printbibliography$if(biblio-title)$[title=$biblio-title$]$endif$
+
+$endif$
+$for(include-after)$
+$include-after$
+
+$endfor$
+\end{document}
diff --git a/presentations/pasc/metadata.yaml b/presentations/pasc/metadata.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..acc55e66eb58fa27b2a40ea918bb644a28f97635
--- /dev/null
+++ b/presentations/pasc/metadata.yaml
@@ -0,0 +1,7 @@
+---
+# used for lecture slides and homework sheets
+subtitle: "Séminaire de groupe du SPC"
+author: "Orestis Malaspinas, ITI, HEPIA"
+# lang: fr-CH
+...
+
diff --git a/presentations/pasc/my_highlight.theme b/presentations/pasc/my_highlight.theme
new file mode 100644
index 0000000000000000000000000000000000000000..1d80b47b144ca30f252d27a32e3d3775267c7bbb
--- /dev/null
+++ b/presentations/pasc/my_highlight.theme
@@ -0,0 +1,204 @@
+{
+    "text-color": null,
+    "background-color": "#f0f0f0",
+    "line-number-color": "#aaaaaa",
+    "line-number-background-color": null,
+    "text-styles": {
+        "Other": {
+            "text-color": "#8f5902",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "Attribute": {
+            "text-color": "#c4a000",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "SpecialString": {
+            "text-color": "#4e9a06",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "Annotation": {
+            "text-color": "#8f5902",
+            "background-color": null,
+            "bold": true,
+            "italic": true,
+            "underline": false
+        },
+        "Function": {
+            "text-color": "#000000",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "String": {
+            "text-color": "#4e9a06",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "ControlFlow": {
+            "text-color": "#204a87",
+            "background-color": null,
+            "bold": true,
+            "italic": false,
+            "underline": false
+        },
+        "Operator": {
+            "text-color": "#ce5c00",
+            "background-color": null,
+            "bold": true,
+            "italic": false,
+            "underline": false
+        },
+        "Error": {
+            "text-color": "#a40000",
+            "background-color": null,
+            "bold": true,
+            "italic": false,
+            "underline": false
+        },
+        "BaseN": {
+            "text-color": "#0000cf",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "Alert": {
+            "text-color": "#ef2929",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "Variable": {
+            "text-color": "#000000",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "Extension": {
+            "text-color": null,
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "Preprocessor": {
+            "text-color": "#8f5902",
+            "background-color": null,
+            "bold": false,
+            "italic": true,
+            "underline": false
+        },
+        "Information": {
+            "text-color": "#8f5902",
+            "background-color": null,
+            "bold": true,
+            "italic": true,
+            "underline": false
+        },
+        "VerbatimString": {
+            "text-color": "#4e9a06",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "Warning": {
+            "text-color": "#8f5902",
+            "background-color": null,
+            "bold": true,
+            "italic": true,
+            "underline": false
+        },
+        "Documentation": {
+            "text-color": "#8f5902",
+            "background-color": null,
+            "bold": true,
+            "italic": true,
+            "underline": false
+        },
+        "Import": {
+            "text-color": null,
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "Char": {
+            "text-color": "#4e9a06",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "DataType": {
+            "text-color": "#204a87",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "Float": {
+            "text-color": "#0000cf",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "Comment": {
+            "text-color": "#8f5902",
+            "background-color": null,
+            "bold": false,
+            "italic": true,
+            "underline": false
+        },
+        "CommentVar": {
+            "text-color": "#8f5902",
+            "background-color": null,
+            "bold": true,
+            "italic": true,
+            "underline": false
+        },
+        "Constant": {
+            "text-color": "#000000",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "SpecialChar": {
+            "text-color": "#000000",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "DecVal": {
+            "text-color": "#0000cf",
+            "background-color": null,
+            "bold": false,
+            "italic": false,
+            "underline": false
+        },
+        "Keyword": {
+            "text-color": "#204a87",
+            "background-color": null,
+            "bold": true,
+            "italic": false,
+            "underline": false
+        }
+    }
+}
diff --git a/presentations/pasc/pres.md b/presentations/pasc/pres.md
new file mode 100644
index 0000000000000000000000000000000000000000..64aaa6e4cafc9a6665990ebf4bff71bca92f4ed9
--- /dev/null
+++ b/presentations/pasc/pres.md
@@ -0,0 +1,136 @@
+% Ludwig Boltzmann and the Raiders of the Futhark
+% O. Malaspinas
+% 15 novembre 2019
+
+# What is Futhark
+
+* Statically typed, data-parallel, purely functional, array language ...
+* with limited functionalities (no I/O) ...
+* that compiles to C, OpenCL, and Cuda backends ...
+* efficiently ...
+* without the pain of actually writing GPU code.
+
+Spoiler: a toy d3q27 recursive regularized model does 1.5 GLUPS (single precision).
+
+# Why use Futhark?
+
+<!-- TODO: add opencl example -->
+
+# How to use Futhark
+
+* Not intended to replace existing generic-purpose languages.
+* But aims at being easily integrated into non Futhark code:
+
+    * Used into python code: `PyOpenCL`,
+    * Conventional `C` code,
+    * Several others (`C#`, `Haskell`, `F#`, ..., and soon `Rust`).
+
+* Futhark produces `C` code so it's accessible from any language through a FFI.
+
+# An example: the dot product
+
+## `dotprod.fut`
+
+```
+entry dotprod (xs: []i32) (ys: []i32): i32 =
+  reduce (+) 0 (map2 (*) xs ys)
+```
+
+Futhark is very efficient at parallelizing array operations.
+
+# An example: the dot product from `C`
+
+```C
+#include <stdio.h>
+#include "dotprod.h"
+int main() {
+  int x[] = { 1, 2, 3, 4 };
+  int y[] = { 2, 3, 4, 1 };
+  // here goes context init
+  futhark_i32_1d *x_arr = futhark_new_i32_1d(ctx, x, 4);
+  futhark_i32_1d *y_arr = futhark_new_i32_1d(ctx, y, 4);
+
+  int res;
+  futhark_entry_dotprod(ctx, &res, x_arr, y_arr);
+  futhark_context_sync(ctx);
+
+  printf("Result: %d\n", res);
+  // here goes context destruction and
+  // memory deallocation
+}
+```
+
+# And now
+
+Let's implement some lattice Boltzmann
+
+# Computation of macroscopic moments
+
+## LBM equation
+
+\begin{equation}
+\rho=\sum_{i=0}^{q-1}f_i,\quad \rho\bm{u}=\sum_{i=0}^{q-1}f_i \bm{c}_i, \forall\ \bm{x}.
+\end{equation}
+
+## Futhark code
+
+```
+let compute_rho (f: [nx][ny][nz][27]f32): [nx][ny][nz]f32 = 
+    map ( map ( map ( reduce (+) 0.0 ) ) ) f
+```
+
+<!-- TODO add j -->
+
+# Computation of the equilibrium distribution
+
+## LBM equation
+
+\begin{equation}
+f_i^\mathrm{eq}=w_i\rho\left(1+\frac{\bm{c}_i\cdot \bm{u}}{c_s^2}+\frac{1}{2c_s^4}(\bm{c}_i\cdot \bm{u})^2-\frac{1}{2c_s^2}\bm{u}^2\right),\ \forall \bm{x},i
+\end{equation}
+
+## Futhark code
+
+<!-- TODO add feq -->
+
+# Collision
+
+## LBM equation
+
+\begin{equation}
+f^\mathrm{out}_i=f_i\left(1-\frac{1}{\tau}\right)+\frac{1}{\tau}f_i^\mathrm{eq}.
+\end{equation}
+
+## Futhark code
+
+<!-- TODO add collision -->
+
+# Propagation
+
+## LBM equation
+
+\begin{equation}
+f_i(\bm{x}+\bm{c}_i,t+1)=f^\mathrm{out}_i(\bm{x},t).
+\end{equation}
+
+## Futhark code
+
+<!-- TODO add collision -->
+
+# And now everything together
+
+1. Collision (completely local).
+    * Compute $\rho$,
+    * Compute $\bm{u}$,
+    * Compute $f_i^\mathrm{eq}$,
+    * Compute $f_i^\mathrm{out}$.
+2. Propagation.
+
+## Futhark code
+
+<!-- TODO add collide and stream -->
+
+
+
+
+