diff --git a/presentations/pasc/Makefile b/presentations/pasc/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..575514e294ff02d83fbf01015b7fde56b0302d1d --- /dev/null +++ b/presentations/pasc/Makefile @@ -0,0 +1,24 @@ +DATADIR = ./ +FILTERDIR = $(DATADIR)/filters +RESOURCEDIR = $(DATADIR)/resources + +PDFOPTIONS = -t beamer +PDFOPTIONS += --highlight-style my_highlight.theme +PDFOPTIONS += --pdf-engine pdflatex +PDFOPTIONS += --template=./default.latex +PDFOPTIONS += -V theme:metropolis +PDFOPTIONS += -V themeoptions:numbering=none -V themeoptions:progressbar=foot +PDFOPTIONS += -V fontsize=smaller +# PDFOPTIONS += --filter pandoc-beamer-block +# PDFOPTIONS += --lua-filter=${FILTERDIR}/tex.lua +# PDFOPTIONS += --include-in-header=${RESOURCEDIR}/definitions.tex +# PDFOPTIONS += --include-in-header=${RESOURCEDIR}/beamer.tex +PDFOPTIONS += $(OPTIONS) + +all: pres.pdf + +pres.pdf: pres.md metadata.yaml + pandoc $(PDFOPTIONS) -o $@ $^ + +clean: + rm -f *.pdf diff --git a/presentations/pasc/code/.gitignore b/presentations/pasc/code/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..fc0cf783b825875336cec320dfadbdb0fc051c40 --- /dev/null +++ b/presentations/pasc/code/.gitignore @@ -0,0 +1,5 @@ +*.o +lbm +liblbm.h +liblbm +liblbm.c diff --git a/presentations/pasc/code/Makefile b/presentations/pasc/code/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..cc6e33264955f87c888fefc6cc0938a5b46a51a5 --- /dev/null +++ b/presentations/pasc/code/Makefile @@ -0,0 +1,75 @@ +TARGET=c + +ifeq ($(TARGET),cuda) + CUDALIBS=-lcudart -lcuda -lnvrtc +else ifeq ($(TARGET), opencl) + OPENCLIBS=-lOpenCL +endif + +CC=gcc +FC=futhark +OPTS=-Wall -Wextra -g -std=gnu11 +SANITIZERS=-fsanitize=address +OPTIM=-O3 +LIBS=-lm $(SDLLIBS) $(CUDALIBS) $(OPENCLIBS) $(SANITIZERS) + +all: lbm3d + +raider_lbm3d: lbm3d.o raider_lbm.o scalar_field.o tensor_field.o scalar_field_2d.o tensor_field_2d.o lbm_utils.o vtk.o point.o box.o units.o utils.o + $(CC) $(OPTS) $(OPTIM) -o $@ $^ $(LIBS) + + +lbm3d: lbm3d.o liblbm.o scalar_field.o tensor_field.o scalar_field_2d.o tensor_field_2d.o lbm_utils.o vtk.o point.o box.o units.o utils.o + $(CC) $(OPTS) $(OPTIM) -o $@ $^ $(LIBS) + +lbm3d.o: lbm3d.c liblbm.c liblbm.h c_interface/*.h + $(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $< + +liblbm.o: liblbm.c liblbm.h + $(CC) $(OPTIM) $(OPTS) -c $< + +liblbm.c: liblbm.fut + $(FC) $(TARGET) --library $< + +raider_lbm.o: ../presentation/raider_lbm.c ../presentation/raider_lbm.h + $(CC) $(OPTIM) $(OPTS) -c $< + +../presentation/raider_lbm.c: ../presentation/raider_lbm.fut + $(FC) $(TARGET) --library $< + +tensor_field.o: c_interface/tensor_field.c c_interface/tensor_field.h c_interface/scalar_field.h + $(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $< + +scalar_field.o: c_interface/scalar_field.c c_interface/scalar_field.h + $(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $< + +tensor_field_2d.o: c_interface/tensor_field_2d.c c_interface/tensor_field_2d.h c_interface/scalar_field_2d.h + $(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $< + +scalar_field_2d.o: c_interface/scalar_field_2d.c c_interface/scalar_field_2d.h + $(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $< + +lbm_utils.o: c_interface/lbm_utils.c c_interface/lbm_utils.h c_interface/globals.h c_interface/scalar_field.h c_interface/tensor_field.h + $(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $< + +vtk.o: c_interface/vtk.c c_interface/vtk.h c_interface/box.h c_interface/point.h c_interface/scalar_field.h c_interface/tensor_field.h c_interface/globals.h + $(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $< + +point.o: c_interface/point.c c_interface/point.h c_interface/globals.h + $(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $< + +box.o: c_interface/box.c c_interface/box.h c_interface/globals.h + $(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $< + +utils.o: c_interface/utils.c c_interface/utils.h c_interface/globals.h + $(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $< + +units.o: c_interface/units.c c_interface/units.h c_interface/utils.h c_interface/globals.h + $(CC) $(OPTIM) $(OPTS) $(SANITIZERS) -c $< + + +clean: + rm -f liblbm.h liblbm.c liblbm.o liblbm + rm -f *.o + rm -f lbm3d + diff --git a/presentations/pasc/code/c_interface/box.c b/presentations/pasc/code/c_interface/box.c new file mode 100644 index 0000000000000000000000000000000000000000..81e98b57f17e0e205ede6077a2d142f8daab6afd --- /dev/null +++ b/presentations/pasc/code/c_interface/box.c @@ -0,0 +1,12 @@ +#include "box.h" + +box box_create(fint x0, fint x1, fint y0, fint y1, fint z0, fint z1) { + box b; + b.x0 = x0; + b.x1 = x1; + b.y0 = y0; + b.y1 = y1; + b.z0 = z0; + b.z1 = z1; + return b; +} \ No newline at end of file diff --git a/presentations/pasc/code/c_interface/box.h b/presentations/pasc/code/c_interface/box.h new file mode 100644 index 0000000000000000000000000000000000000000..7ad18b1d9f83fb829a801c4862791cf7c515f285 --- /dev/null +++ b/presentations/pasc/code/c_interface/box.h @@ -0,0 +1,12 @@ +#ifndef BOX_H +#define BOX_H + +#include "globals.h" + +typedef struct { + fint x0, x1, y0, y1, z0, z1; +} box; + +box box_create(fint x0, fint x1, fint y0, fint y1, fint z0, fint z1); + +#endif diff --git a/presentations/pasc/code/c_interface/globals.h b/presentations/pasc/code/c_interface/globals.h new file mode 100644 index 0000000000000000000000000000000000000000..1c8ea6c4c1fdfdf9cc7f62b97afa40df46733d98 --- /dev/null +++ b/presentations/pasc/code/c_interface/globals.h @@ -0,0 +1,18 @@ +#ifndef GLOBALS_H +#define GLOBALS_H + +#include <stdint.h> +#include <math.h> + +typedef int32_t fint; +typedef uint32_t ufint; + +#ifdef DBL +typedef double T; +#else +typedef float T; +#endif + +static const T pi = (T)4*atan((T)1); + +#endif \ No newline at end of file diff --git a/presentations/pasc/code/c_interface/includes.h b/presentations/pasc/code/c_interface/includes.h new file mode 100644 index 0000000000000000000000000000000000000000..b8dde1935014e31d48c68c96173c4988964cca95 --- /dev/null +++ b/presentations/pasc/code/c_interface/includes.h @@ -0,0 +1,16 @@ +#ifndef INCLUDES_H +#define INCLUDES_H + +#include "globals.h" +#include "scalar_field_2d.h" +#include "scalar_field.h" +#include "tensor_field_2d.h" +#include "tensor_field.h" +#include "lbm_utils.h" +#include "point.h" +#include "box.h" +#include "vtk.h" +#include "units.h" +#include "utils.h" + +#endif \ No newline at end of file diff --git a/presentations/pasc/code/c_interface/lbm_utils.c b/presentations/pasc/code/c_interface/lbm_utils.c new file mode 100644 index 0000000000000000000000000000000000000000..3cc3966ec94adaa1e509b636b2220743799c7291 --- /dev/null +++ b/presentations/pasc/code/c_interface/lbm_utils.c @@ -0,0 +1,157 @@ +#include "lbm_utils.h" +#include <math.h> +#include <assert.h> +#include <stdlib.h> +#include <time.h> + +static T compute_feq(T w, const int c[], T rho, T u[]) { + T c_u = c[0] * u[0]; + T u_sqr = u[0] * u[0]; + for (int id = 1; id < D3; ++id) { + c_u += c[id] * u[id]; + u_sqr += u[id] * u[id]; + } + return w * rho * (1.0 + 3.0 * c_u + 4.5 * c_u * c_u - 1.5 * u_sqr); +} + +void lbm_allocate_and_ini_at_equilibrium_inplace(T rho, T u[], int nx, int ny, int nz, int q, tensor_field field) { + assert(nx == field.nx && "Inconsistent nx size."); + assert(ny == field.ny && "Inconsistent ny size."); + assert(nz == field.nz && "Inconsistent nz size."); + + for (int ix = 0; ix < nx; ++ix) { + for (int iy = 0; iy < ny; ++iy) { + for (int iz = 0; iz < nz; ++iz) { + for (int ipop = 0; ipop < q; ++ipop) { + field.grid[ix][iy][iz][ipop] = compute_feq(w[ipop], c_t[ipop], rho, u); + } + } + } + } +} + +void lbm_allocate_and_ini_at_equilibrium_kida_inplace(T rho0, T u_max, int nx, int ny, int nz, int q, tensor_field field) { + assert(nx == field.nx && "Inconsistent nx size."); + assert(ny == field.ny && "Inconsistent ny size."); + assert(nz == field.nz && "Inconsistent nz size."); + + srand(time(NULL)); + + for (int ix = 0; ix < nx; ++ix) { + for (int iy = 0; iy < ny; ++iy) { + for (int iz = 0; iz < nz; ++iz) { + T u[3]; + + T x = (T)2*pi*(T)ix / (T)(nx); + T y = (T)2*pi*(T)iy / (T)(ny); + T z = (T)2*pi*(T)iz / (T)(nz); + + u[0] = sin(x)*(cos((T)3*y) * cos(z) - cos(y)*cos((T)3*z)); + u[1] = sin(y)*(cos((T)3*z) * cos(x) - cos(z)*cos((T)3*x)); + u[2] = sin(z)*(cos((T)3*x) * cos(y) - cos(x)*cos((T)3*y)); + + for (fint id = 0; id < 3; ++id) { + T rand_u = ((T)rand())/RAND_MAX; + + u[id] += 0.01 * rand_u; + u[id] *= u_max; + } + + T rho = -(1.0/6.0)*cos(4*z)*cos(2*x)*cos(2*y)+(1.0/36.0)*cos(4*z)*cos(4*x)*cos(2*y)+(1.0/36.0)*cos(4*z)*cos(2*x)*cos(4*y); + rho += +cos(2*z)*cos(2*x)*cos(2*y)-(1.0/6.0)*cos(2*z)*cos(4*x)*cos(2*y)-(1.0/6.0)*cos(2*z)*cos(2*x)*cos(4*y); + rho += +(1.0/36.0)*cos(4*x)*cos(4*y)*cos(2*z)-(1.0/80.0)*cos(6*y)*cos(2*z)-(1.0/10.0)*cos(4*z)*cos(2*y)+(1.0/8.0)*cos(4*z)*cos(4*y); + rho += -(1.0/10.0)*cos(4*z)*cos(2*x)+(1.0/8.0)*cos(4*z)*cos(4*x)+(1.0/8.0)*cos(2*z)*cos(2*y)-(1.0/10.0)*cos(2*z)*cos(4*y); + rho += +(1.0/8.0)*cos(2*z)*cos(2*x)-(1.0/10.0)*cos(2*z)*cos(4*x)-(1.0/80.0)*cos(6*x)*cos(2*z)-(1.0/4.0)*cos(2*x); + rho += -(1.0/80.0)*cos(6*z)*cos(2*x)-(1.0/80.0)*cos(6*z)*cos(2*y)-(1.0/80.0)*cos(6*x)*cos(2*y)-(1.0/10.0)*cos(2*x)*cos(4*y); + rho += +(1.0/8.0)*cos(2*x)*cos(2*y)+(1.0/8.0)*cos(4*x)*cos(4*y)-(1.0/80.0)*cos(2*x)*cos(6*y)-(1.0/10.0)*cos(4*x)*cos(2*y); + rho += -(1.0/4.0)*cos(2*z)-(1.0/4.0)*cos(2*y); + + rho *= -u_max * u_max; + rho *= (T)3.0; + rho += rho0; + + for (int ipop = 0; ipop < q; ++ipop) { + field.grid[ix][iy][iz][ipop] = compute_feq(w[ipop], c_t[ipop], rho, u); + } + } + } + } +} + +struct futhark_f32_4d *lbm_collide_and_stream(struct futhark_context *ctx, struct futhark_f32_4d *f, T omega, fint iter) { + struct futhark_f32_4d *fout; + futhark_entry_main(ctx, &fout, f, omega, iter); + futhark_free_f32_4d(ctx, f); + + return fout; +} + +void lbm_compute_density_inplace(tensor_field lattice, scalar_field density) { + assert(tensor_field_consistant_size_scalar_field(lattice, density) && "Sizes of rho and lattice are inconsistent."); + + for (int ix = 0; ix < lattice.nx; ++ix) { + for (int iy = 0; iy < lattice.ny; ++iy) { + for (int iz = 0; iz < lattice.nz; ++iz) { + T rho = 0.0; + for (int ipop = 0; ipop < lattice.q; ++ipop) { + rho += lattice.grid[ix][iy][iz][ipop]; + } + density.grid[ix][iy][iz] = rho; + } + } + } +} + +void lbm_compute_velocity_inplace(tensor_field lattice, tensor_field velocity) { + assert(tensor_field_consistant_spatial_size_tensor_field(lattice, velocity) && "Sizes of velocity and lattice are inconsistent."); + + for (int ix = 0; ix < lattice.nx; ++ix) { + for (int iy = 0; iy < lattice.ny; ++iy) { + for (int iz = 0; iz < lattice.nz; ++iz) { + T rho = 0.0; + for (int ipop = 0; ipop < lattice.q; ++ipop) { + rho += lattice.grid[ix][iy][iz][ipop]; + } + for (int id = 0; id < lattice.dim; ++id) { + velocity.grid[ix][iy][iz][id] = (T)0; + for (int ipop = 0; ipop < lattice.q; ++ipop) { + velocity.grid[ix][iy][iz][id] += lattice.grid[ix][iy][iz][ipop] * c[id][ipop]; + } + velocity.grid[ix][iy][iz][id] /= rho; + } + } + } + } +} + +tensor_field lbm_allocate_and_ini_at_equilibrium(T rho, T u[], int nx, int ny, int nz, int q) { + tensor_field lattice = tensor_field_allocate(nx, ny, nz, q); + + lbm_allocate_and_ini_at_equilibrium_inplace(rho, u, nx, ny, nz, q, lattice); + + return lattice; +} + +tensor_field lbm_allocate_and_ini_at_equilibrium_kida(T rho0, T u_max, int nx, int ny, int nz, int q) { + tensor_field lattice = tensor_field_allocate(nx, ny, nz, q); + + lbm_allocate_and_ini_at_equilibrium_kida_inplace(rho0, u_max, nx, ny, nz, q, lattice); + + return lattice; +} + +scalar_field lbm_compute_density(tensor_field lattice) { + scalar_field density = scalar_field_allocate(lattice.nx, lattice.ny, lattice.nz); + + lbm_compute_density_inplace(lattice, density); + + return density; +} + +tensor_field lbm_compute_velocity(tensor_field lattice) { + tensor_field vel = tensor_field_allocate(lattice.nx, lattice.ny, lattice.nz, lattice.dim); + + lbm_compute_velocity_inplace(lattice, vel); + + return vel; +} diff --git a/presentations/pasc/code/c_interface/lbm_utils.h b/presentations/pasc/code/c_interface/lbm_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..4a6487dcd5ba4a9f793d6c4a82ce726ebadc082f --- /dev/null +++ b/presentations/pasc/code/c_interface/lbm_utils.h @@ -0,0 +1,63 @@ +#ifndef LBM_UTILS_H +#define LBM_UTILS_H + +#include "../liblbm.h" +#include "globals.h" +#include "scalar_field.h" +#include "tensor_field.h" + +#define Q27 27 +#define D3 3 + +static const T cs2 = 1.0 / 3.0; + +static const T w[Q27] = { + 8.0/27, + 2.0/27, 2.0/27, 2.0/27, + 1.0/54, 1.0/54, 1.0/54, + 1.0/54, 1.0/54, 1.0/54, + 1.0/216, 1.0/216, 1.0/216, 1.0/216, + 2.0/27, 2.0/27, 2.0/27, + 1.0/54, 1.0/54, 1.0/54, + 1.0/54, 1.0/54, 1.0/54, + 1.0/216, 1.0/216, 1.0/216, 1.0/216 + }; + +static const fint c_t[Q27][D3] = { + { 0, 0, 0}, + + {-1, 0, 0}, { 0,-1, 0}, { 0, 0,-1}, + {-1,-1, 0}, {-1, 1, 0}, {-1, 0,-1}, + {-1, 0, 1}, { 0,-1,-1}, { 0,-1, 1}, + {-1,-1,-1}, {-1,-1, 1}, {-1, 1,-1}, {-1, 1, 1}, + + { 1, 0, 0}, { 0, 1, 0}, { 0, 0, 1}, + { 1, 1, 0}, { 1,-1, 0}, { 1, 0, 1}, + { 1, 0,-1}, { 0, 1, 1}, { 0, 1,-1}, + { 1, 1, 1}, { 1, 1,-1}, { 1,-1, 1}, { 1,-1,-1} }; + +static const fint c[D3][Q27] = { + { 0, -1, 0, 0, -1, -1, -1, -1, 0, 0, -1, -1, -1, -1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1 }, + { 0, 0, -1, 0, -1, 1, 0, 0, -1, -1, -1, -1, 1, 1, 0, 1, 0, 1, -1, 0, 0, 1, 1, 1, 1, -1, -1 }, + { 0, 0, 0, -1, 0, 0, -1, 1, -1, 1, -1, 1, -1, 1, 0, 0, 1, 0, 0, 1, -1, 1, -1, 1, -1, 1, -1 }, +}; + +void lbm_allocate_and_ini_at_equilibrium_inplace(T rho, T u[], int nx, int ny, int nz, int q, tensor_field field); + +void lbm_allocate_and_ini_at_equilibrium_kida_inplace(T rho0, T u_max, int nx, int ny, int nz, int q, tensor_field field); + +struct futhark_f32_4d *lbm_collide_and_stream(struct futhark_context *ctx, struct futhark_f32_4d *f, T omega, fint iter); + +void lbm_compute_density_inplace(tensor_field lattice, scalar_field rho); + +void lbm_compute_velocity_inplace(tensor_field lattice, tensor_field velocity); + +tensor_field lbm_allocate_and_ini_at_equilibrium(T rho, T u[], int nx, int ny, int nz, int q); + +tensor_field lbm_allocate_and_ini_at_equilibrium_kida(T rho0, T u_max, int nx, int ny, int nz, int q); + +scalar_field lbm_compute_density(tensor_field lattice); + +tensor_field lbm_compute_velocity(tensor_field lattice); + +#endif \ No newline at end of file diff --git a/presentations/pasc/code/c_interface/point.c b/presentations/pasc/code/c_interface/point.c new file mode 100644 index 0000000000000000000000000000000000000000..902d69ccefea37cfe4facbaf00728b9fe2853cb7 --- /dev/null +++ b/presentations/pasc/code/c_interface/point.c @@ -0,0 +1,14 @@ +#include "point.h" + +point point_create(T x, T y, T z) { + point p; + p.x = x; + p.y = y; + p.z = z; + + return p; +} + +point point_create_default() { + return point_create((T)0, (T)0, (T)0); +} \ No newline at end of file diff --git a/presentations/pasc/code/c_interface/point.h b/presentations/pasc/code/c_interface/point.h new file mode 100644 index 0000000000000000000000000000000000000000..df80156c7e23988916ec9a9a6d8c094d237d8fb0 --- /dev/null +++ b/presentations/pasc/code/c_interface/point.h @@ -0,0 +1,13 @@ +#ifndef POINT_H +#define POINT_H + +#include "globals.h" + +typedef struct { + T x, y, z; +} point; + +point point_create(T x, T y, T z); +point point_create_default(); + +#endif diff --git a/presentations/pasc/code/c_interface/scalar_field.c b/presentations/pasc/code/c_interface/scalar_field.c new file mode 100644 index 0000000000000000000000000000000000000000..339bbfaf16d68e818fb6c29f78a4dae4db16669d --- /dev/null +++ b/presentations/pasc/code/c_interface/scalar_field.c @@ -0,0 +1,134 @@ +#include "scalar_field.h" +#include "scalar_field_2d.h" +#include <stdlib.h> +#include <string.h> +#include <float.h> +#include <stdio.h> +#include <assert.h> + +bool scalar_field_consistant_size_scalar_field(scalar_field rhs, scalar_field lhs) { + return (rhs.nx == lhs.nx && rhs.ny == lhs.ny && rhs.nz == lhs.nz); +} + +scalar_field scalar_field_allocate(fint nx, fint ny, fint nz) { + scalar_field field; + field.nx = nx; + field.ny = ny; + field.nz = nz; + + field.data = malloc(sizeof(T) * nx * ny * nz); + field.grid = malloc(sizeof(T **) * nx); + for (fint ix = 0; ix < nx; ++ix) { + field.grid[ix] = malloc(sizeof(T *) * ny); + for (fint iy = 0; iy < ny; ++iy) { + field.grid[ix][iy] = field.data + nz * (iy + ny * ix); + } + } + return field; +} + +void scalar_field_deallocate(scalar_field field) { + free(field.data); + for (fint ix = 0; ix < field.nx; ++ix) { + free(field.grid[ix]); + } + free(field.grid); +} + +T scalar_field_compute_max(scalar_field field) { +#ifdef DBL + T max = DBL_MIN; +#else + T max = FLT_MIN; +#endif + for (fint ix = 0; ix < field.nx; ++ix) { + for (fint iy = 0; iy < field.ny; ++iy){ + for (fint iz = 0; iz < field.nz; ++iz){ + if (field.grid[ix][iy][iz] > max) { + max = field.grid[ix][iy][iz]; + } + } + } + } + return max; +} + +T scalar_field_compute_min(scalar_field field) { +#ifdef DBL + T min = DBL_MAX; +#else + T min = FLT_MAX; +#endif + for(int ix = 0; ix < field.nx; ++ix) { + for(int iy = 0; iy < field.ny; ++iy){ + for (fint iz = 0; iz < field.nz; ++iz){ + if (field.grid[ix][iy][iz] < min) { + min = field.grid[ix][iy][iz]; + } + } + } + } + return min; +} + +scalar_field_2d scalar_field_extract_2d_x(scalar_field field, int ix) { + scalar_field_2d field_2d = scalar_field_2d_allocate(field.ny, field.nz); + + for (fint iy = 0; iy < field.ny; ++iy) { + for (fint iz = 0; iz < field.nz; ++iz) { + field_2d.grid[ix][iy] = field.grid[ix][iy][iz]; + } + } + return field_2d; +} + +scalar_field_2d scalar_field_extract_2d_y(scalar_field field, int iy) { + scalar_field_2d field_2d = scalar_field_2d_allocate(field.nx, field.nz); + + for (fint ix = 0; ix < field.nx; ++ix) { + for (fint iz = 0; iz < field.nz; ++iz) { + field_2d.grid[ix][iy] = field.grid[ix][iy][iz]; + } + } + return field_2d; +} + +scalar_field_2d scalar_field_extract_2d_z(scalar_field field, int iz) { + scalar_field_2d field_2d = scalar_field_2d_allocate(field.nx, field.ny); + + for (fint ix = 0; ix < field.nx; ++ix) { + for (fint iy = 0; iy < field.ny; ++iy) { + field_2d.grid[ix][iy] = field.grid[ix][iy][iz]; + } + } + return field_2d; +} + +/** + * Ecriture du tableau des vitesses dans un fichier + **/ +void scalar_field_write_file(scalar_field field, char *dir, char *prefix, int i) { + // Create filename + char integer_string[128]; + sprintf(integer_string, "%s%s%d", dir, prefix, i); + assert(strlen(integer_string) < 128); + char *filename = strcat(integer_string, ".dat"); + + // Open file + FILE *fp; + if ((fp = fopen(filename, "w")) == NULL) { + printf("Error ! opening file"); + exit(EXIT_FAILURE); + } + + // Fill file + for (fint x = 0; x < field.nx; x++) { + for (fint y = 0; y < field.ny; y++) { + for (fint z = 0; z < field.nz; z++) { + fprintf(fp, "%f ", field.grid[x][y][z]); + } + } + } + + fclose(fp); +} diff --git a/presentations/pasc/code/c_interface/scalar_field.h b/presentations/pasc/code/c_interface/scalar_field.h new file mode 100644 index 0000000000000000000000000000000000000000..813847778485aba9535a81a0ffa56907a85b6a0a --- /dev/null +++ b/presentations/pasc/code/c_interface/scalar_field.h @@ -0,0 +1,32 @@ +#ifndef SCALAR_FIELD_H +#define SCALAR_FIELD_H + +#include <stdbool.h> +#include "globals.h" +#include "scalar_field_2d.h" + +typedef struct { + T *data; + T ***grid; + fint nx, ny, nz; +} scalar_field; + +bool scalar_field_consistant_size_scalar_field(scalar_field rhs, scalar_field lhs); + +scalar_field scalar_field_allocate(fint nx, fint ny, fint nz); + +void scalar_field_deallocate(scalar_field field); + +T scalar_field_compute_min(scalar_field field); + +T scalar_field_compute_max(scalar_field field); + +void scalar_field_write_file(scalar_field field, char *dir, char *prefix, int i); + +scalar_field_2d scalar_field_extract_2d_x(scalar_field field, int ix); + +scalar_field_2d scalar_field_extract_2d_y(scalar_field field, int iy); + +scalar_field_2d scalar_field_extract_2d_z(scalar_field field, int iz); + +#endif \ No newline at end of file diff --git a/presentations/pasc/code/c_interface/scalar_field_2d.c b/presentations/pasc/code/c_interface/scalar_field_2d.c new file mode 100644 index 0000000000000000000000000000000000000000..be51f050c84d8adf5c41addf0d22c2f3ffeee6d3 --- /dev/null +++ b/presentations/pasc/code/c_interface/scalar_field_2d.c @@ -0,0 +1,83 @@ +#include "scalar_field_2d.h" +#include <stdlib.h> +#include <string.h> +#include <float.h> +#include <stdio.h> +#include <assert.h> + +scalar_field_2d scalar_field_2d_allocate(fint nx, fint ny) { + scalar_field_2d field; + field.nx = nx; + field.ny = ny; + + field.data = malloc(sizeof(T) * nx * ny); + field.grid = malloc(sizeof(T *) * nx); + for (fint ix = 0; ix < nx; ++ix) { + field.grid[ix] = field.data + ny * ix; + } + return field; +} + +void scalar_field_2d_deallocate(scalar_field_2d field) { + free(field.data); + free(field.grid); +} + +T scalar_field_2d_compute_max(scalar_field_2d field) { +#ifdef DBL + T max = DBL_MIN; +#else + T max = FLT_MIN; +#endif + for (fint ix = 0; ix < field.nx; ++ix) { + for (fint iy = 0; iy < field.ny; ++iy){ + if (field.grid[ix][iy] > max) { + max = field.grid[ix][iy]; + } + } + } + return max; +} + +T scalar_field_2d_compute_min(scalar_field_2d field) { +#ifdef DBL + T min = DBL_MAX; +#else + T min = FLT_MAX; +#endif + for(int ix = 0; ix < field.nx; ++ix) { + for(int iy = 0; iy < field.ny; ++iy){ + if (field.grid[ix][iy] < min) { + min = field.grid[ix][iy]; + } + } + } + return min; +} + +/** + * Ecriture du tableau des vitesses dans un fichier + **/ +void scalar_field_2d_write_file(scalar_field_2d field, char *dir, char *prefix, int i) { + // Create filename + char integer_string[128]; + sprintf(integer_string, "%s%s%d", dir, prefix, i); + assert(strlen(integer_string) < 128); + char *filename = strcat(integer_string, ".dat"); + + // Open file + FILE *fp; + if ((fp = fopen(filename, "w")) == NULL) { + printf("Error ! opening file"); + exit(EXIT_FAILURE); + } + + // Fill file + for (fint ix = 0; ix < field.nx; ix++) { + for (fint iy = 0; iy < field.ny; iy++) { + fprintf(fp, "%f ", field.grid[ix][iy]); + } + } + + fclose(fp); +} diff --git a/presentations/pasc/code/c_interface/scalar_field_2d.h b/presentations/pasc/code/c_interface/scalar_field_2d.h new file mode 100644 index 0000000000000000000000000000000000000000..0ab22c5e5a430c7385bda192e8c2a8a381d42b01 --- /dev/null +++ b/presentations/pasc/code/c_interface/scalar_field_2d.h @@ -0,0 +1,22 @@ +#ifndef SCALAR_FIELD_2D_H +#define SCALAR_FIELD_2D_H + +#include "globals.h" + +typedef struct { + T *data; + T **grid; + fint nx, ny; +} scalar_field_2d; + +scalar_field_2d scalar_field_2d_allocate(fint nx, fint ny); + +void scalar_field_2d_deallocate(scalar_field_2d field); + +T scalar_field_2d_compute_min(scalar_field_2d field); + +T scalar_field_2d_compute_max(scalar_field_2d field); + +void scalar_field_2d_write_file(scalar_field_2d field, char *dir, char *prefix, int i); + +#endif \ No newline at end of file diff --git a/presentations/pasc/code/c_interface/tensor_field.c b/presentations/pasc/code/c_interface/tensor_field.c new file mode 100644 index 0000000000000000000000000000000000000000..1bfd6bcacb225fad95986fd94d5d7a9580abb61a --- /dev/null +++ b/presentations/pasc/code/c_interface/tensor_field.c @@ -0,0 +1,113 @@ +#include "tensor_field.h" +#include <math.h> +#include <stdlib.h> +#include <assert.h> + +bool tensor_field_consistant_spatial_size_tensor_field(tensor_field lhs, tensor_field rhs) { + return (rhs.nx == lhs.nx && rhs.ny == lhs.ny && rhs.nz == lhs.nz); +} + +bool tensor_field_consistant_size_tensor_field(tensor_field lhs, tensor_field rhs) { + return (rhs.nx == lhs.nx && rhs.ny == lhs.ny && rhs.nz == lhs.nz && rhs.q == lhs.q); +} + +bool tensor_field_consistant_size_scalar_field(tensor_field lhs, scalar_field rhs) { + return (rhs.nx == lhs.nx && rhs.ny == lhs.ny && rhs.nz == lhs.nz); +} + +tensor_field tensor_field_allocate(fint nx, fint ny, fint nz, fint q) { + tensor_field field; + field.nx = nx; + field.ny = ny; + field.nz = nz; + field.q = q; + field.dim = 3; + + field.data = malloc(sizeof(T) * nx * ny * nz * q); + field.grid = malloc(sizeof(T ***) * nx); + for (fint ix = 0; ix < nx; ++ix) { + field.grid[ix] = malloc(sizeof(T **) * ny); + for (fint iy = 0; iy < ny; ++iy) { + field.grid[ix][iy] = malloc(sizeof(T *) * nz); + for (fint iz = 0; iz < nz; ++iz) { + field.grid[ix][iy][iz] = field.data + q * (iz + nz * (iy + ny * ix)); + } + } + } + return field; +} + +void tensor_field_deallocate(tensor_field field) { + free(field.data); + + for (fint ix = 0; ix < field.nx; ++ix) { + for (fint iy = 0; iy < field.ny; ++iy) { + free(field.grid[ix][iy]); + } + free(field.grid[ix]); + } + free(field.grid); +} + +void tensor_field_from_futhark_f32_4d_inplace(struct futhark_f32_4d *f, struct futhark_context *ctx, tensor_field field) { + int64_t *shape = futhark_shape_f32_4d(ctx, f); + fint nx = shape[0]; + fint ny = shape[1]; + fint nz = shape[2]; + fint q = shape[3]; + + assert(nx == field.nx); + assert(ny == field.ny); + assert(nz == field.nz); + assert(q == field.q); + + futhark_values_f32_4d(ctx, f, field.data); +} + +void tensor_field_compute_norm_inplace(tensor_field field, scalar_field norm) { + assert(norm.nx == field.nx); + assert(norm.ny == field.ny); + assert(norm.nz == field.nz); + + for (int ix = 0; ix < field.nx; ++ix) { + for (int iy = 0; iy < field.ny; ++iy) { + for (int iz = 0; iz < field.nz; ++iz) { + T n = 0.0; + for (int id = 0; id < field.q; ++id) { + n += field.grid[ix][iy][iz][id] * field.grid[ix][iy][iz][id]; + } + norm.grid[ix][iy][iz] = sqrt(n); + } + } + } +} + +tensor_field tensor_field_from_futhark_f32_4d(struct futhark_f32_4d *f, struct futhark_context *ctx) { + int64_t *shape = futhark_shape_f32_4d(ctx, f); + fint nx = shape[0]; + fint ny = shape[1]; + fint nz = shape[2]; + fint q = shape[3]; + + tensor_field field = tensor_field_allocate(nx, ny, nz, q); + + tensor_field_from_futhark_f32_4d_inplace(f, ctx, field); + + return field; +} + +struct futhark_f32_4d *tensor_field_to_futhark_f32_4d(tensor_field field, struct futhark_context *ctx) { + + struct futhark_f32_4d *f = futhark_new_f32_4d(ctx, + field.data, field.nx, field.ny, field.nz, field.q); + return f; +} + +scalar_field tensor_field_compute_norm(tensor_field field) { + scalar_field norm = scalar_field_allocate(field.nx, field.ny, field.nz); + + tensor_field_compute_norm_inplace(field, norm); + + return norm; +} + diff --git a/presentations/pasc/code/c_interface/tensor_field.h b/presentations/pasc/code/c_interface/tensor_field.h new file mode 100644 index 0000000000000000000000000000000000000000..1cce5b8a98b09f9edaecee6cddcc9b06f84b6684 --- /dev/null +++ b/presentations/pasc/code/c_interface/tensor_field.h @@ -0,0 +1,33 @@ +#ifndef TENSOR_FIELD_H +#define TENSOR_FIELD_H + +#include "globals.h" +#include "scalar_field.h" +#include "../liblbm.h" + +typedef struct { + T *data; + T ****grid; + fint nx, ny, nz, q; + fint dim; +} tensor_field; + +bool tensor_field_consistant_spatial_size_tensor_field(tensor_field lhs, tensor_field rhs); +bool tensor_field_consistant_size_tensor_field(tensor_field lhs, tensor_field rhs); +bool tensor_field_consistant_size_scalar_field(tensor_field lhs, scalar_field rhs); + +tensor_field tensor_field_allocate(fint nx, fint ny, fint nz, fint q); + +void tensor_field_deallocate(tensor_field field); + +void tensor_field_from_futhark_f32_4d_inplace(struct futhark_f32_4d *f_in, struct futhark_context *ctx, tensor_field field); +void tensor_field_compute_norm_inplace(tensor_field field, scalar_field norm); + +tensor_field tensor_field_from_futhark_f32_4d(struct futhark_f32_4d *f_in, struct futhark_context *ctx); +struct futhark_f32_4d *tensor_field_to_futhark_f32_4d(tensor_field field, struct futhark_context *ctx); + +scalar_field tensor_field_compute_norm(tensor_field field); + + + +#endif \ No newline at end of file diff --git a/presentations/pasc/code/c_interface/tensor_field_2d.c b/presentations/pasc/code/c_interface/tensor_field_2d.c new file mode 100644 index 0000000000000000000000000000000000000000..da1a802845c6daba809fc0f7c6a9076f08a38eb3 --- /dev/null +++ b/presentations/pasc/code/c_interface/tensor_field_2d.c @@ -0,0 +1,48 @@ +#include "tensor_field_2d.h" +#include <math.h> +#include <stdlib.h> + +tensor_field_2d tensor_field_2d_allocate(fint nx, fint ny, fint q) { + tensor_field_2d field; + field.nx = nx; + field.ny = ny; + field.q = q; + field.dim = 2; + + field.data = malloc(sizeof(T) * nx * ny * q); + field.grid = malloc(sizeof(T **) * nx); + for (fint ix = 0; ix < nx; ++ix) { + field.grid[ix] = malloc(sizeof(T *) * ny); + for (fint iy = 0; iy < ny; ++iy) { + field.grid[ix][iy] = field.data + q * (iy + ny * ix); + } + } + return field; +} + +void tensor_field_2d_deallocate(tensor_field_2d field) { + free(field.data); + + for (fint ix = 0; ix < field.nx; ++ix) { + for (fint iy = 0; iy < field.ny; ++iy) { + free(field.grid[ix][iy]); + } + free(field.grid[ix]); + } +} + +scalar_field_2d tensor_field_2d_compute_norm(tensor_field_2d field) { + scalar_field_2d norm = scalar_field_2d_allocate(field.nx, field.ny); + + for (int ix = 0; ix < field.nx; ++ix) { + for (int iy = 0; iy < field.ny; ++iy) { + T n = 0.0; + for (int id = 0; id < field.q; ++id) { + n += field.grid[ix][iy][id] * field.grid[ix][iy][id]; + } + norm.grid[ix][iy] = sqrt(n); + } + } + return norm; +} + diff --git a/presentations/pasc/code/c_interface/tensor_field_2d.h b/presentations/pasc/code/c_interface/tensor_field_2d.h new file mode 100644 index 0000000000000000000000000000000000000000..86f3857ac9a46bcde21e1f7e1fc48bd689804c18 --- /dev/null +++ b/presentations/pasc/code/c_interface/tensor_field_2d.h @@ -0,0 +1,22 @@ +#ifndef TENSOR_FIELD_2D_H +#define TENSOR_FIELD_2D_H + +#include "globals.h" +#include "scalar_field_2d.h" +#include "../liblbm.h" + +typedef struct { + T *data; + T ***grid; + fint nx, ny, q; + fint dim; +} tensor_field_2d; + +tensor_field_2d tensor_field_2d_allocate(fint nx, fint ny, fint q); + +void tensor_field_2d_deallocate(tensor_field_2d field); + +scalar_field_2d tensor_field_2d_compute_norm(tensor_field_2d field); + + +#endif \ No newline at end of file diff --git a/presentations/pasc/code/c_interface/units.c b/presentations/pasc/code/c_interface/units.c new file mode 100644 index 0000000000000000000000000000000000000000..dc04fff385cb2003174f6a4765c6c80106c7929f --- /dev/null +++ b/presentations/pasc/code/c_interface/units.c @@ -0,0 +1,53 @@ +#include <assert.h> +#include <stdio.h> +#include "units.h" +#include "utils.h" +#include "lbm_utils.h" + +units_converter units_converter_create_inertial(T Re, T l_phys, fint N, T u_phys, T u_lb, T lx, T ly, T lz) { + units_converter units; + + // imposed quantities + units.Re = Re; + units.l_phys = l_phys; + units.N = N; + units.u_phys = u_phys; + units.u_lb = u_lb; + units.lx = lx; + units.ly = ly; + units.lz = lz; + + // derived quantities phys + units.nu_phys = units.u_phys * units.l_phys / units.Re; + + + // derived quantities lb + units.delta_x = units.l_phys / N; + units.delta_t = units.delta_x * units.u_lb / units.u_phys; + units.nu_lb = units.u_lb * units.N / units.Re; + assert(is_approx_equal(units.nu_lb * sqr_f(units.delta_x) / units.delta_t, units.nu_phys, 1.0e-7)); + T tau = units.nu_lb / cs2 + 0.5; + units.omega = (T)1 / tau; + + + units.nx = (fint)(round_to_int(units.lx * N) + 1); + units.ny = (fint)(round_to_int(units.ly * N)+ 1); + units.nz = (fint)(round_to_int(units.lz * N) + 1); + + return units; +} + +void units_converter_print(units_converter units) { + printf("==========================================\n"); + printf(" Parameters \n"); + printf("==========================================\n"); + printf("\n"); + printf("Re = %f\n", units.Re); + printf("l_phys = %f, u_phys = %f, nu_phys = %f\n", units.l_phys, units.u_phys, units.nu_phys); + printf("N = %d\n", units.N); + printf("u_lb = %f, nu_lb = %f\n", units.u_lb, units.nu_lb); + printf("omega = %f\n", units.omega); + printf("lx = %f, ly = %f, lz = %f\n", units.lx, units.ly, units.lz); + printf("delta_x = %f, delta_t = %f\n", units.delta_x, units.delta_t); + printf("nx = %d, ny = %d, nz = %d\n", units.nx, units.ny, units.nz); +} diff --git a/presentations/pasc/code/c_interface/units.h b/presentations/pasc/code/c_interface/units.h new file mode 100644 index 0000000000000000000000000000000000000000..13f2f7526e7f5e756bce262d0918e62e517e17f3 --- /dev/null +++ b/presentations/pasc/code/c_interface/units.h @@ -0,0 +1,21 @@ +#ifndef UNITS_H +#define UNITS_H + +#include "globals.h" + +typedef struct { + T Re; + T l_phys, u_phys, nu_phys; + fint N; + T u_lb, nu_lb; + T omega; + T lx, ly, lz; + T delta_x, delta_t; + fint nx, ny, nz; +} units_converter; + +units_converter units_converter_create_inertial(T Re, T l_phys, fint N, T u_phys, T u_lb, T lx, T ly, T lz); + +void units_converter_print(units_converter units); + +#endif \ No newline at end of file diff --git a/presentations/pasc/code/c_interface/utils.c b/presentations/pasc/code/c_interface/utils.c new file mode 100644 index 0000000000000000000000000000000000000000..14cb99850de8f5c31643ceb823a31a5c2f9957ee --- /dev/null +++ b/presentations/pasc/code/c_interface/utils.c @@ -0,0 +1,41 @@ +#include <dirent.h> +#include <errno.h> +#include <assert.h> +#include "utils.h" + +fint round_to_int(T a) { + return (fint)(a + 0.5); +} + + +T sqr_f(T a) { + return a * a; +} + +fint sqr_i(fint a) { + return a * a; +} + +fint max(fint a, fint b) { + if (a > b) { + return a; + } + return b; +} + +bool is_approx_equal(T a, T b, T eps) { + return (fabs(a - b) < eps); +} + +void check_dir(const char * const prefix) { + DIR* dir = opendir(prefix); + if (dir) { + /* Directory exists. */ + closedir(dir); + } else if (ENOENT == errno) { + assert(false && "Error directory does not exist."); + } else { + assert(false && "Error opendir failed."); + } +} + diff --git a/presentations/pasc/code/c_interface/utils.h b/presentations/pasc/code/c_interface/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..a3178d4f00b7a542c09997410fdd748d45d66db3 --- /dev/null +++ b/presentations/pasc/code/c_interface/utils.h @@ -0,0 +1,19 @@ +#ifndef UTILS_H +#define UTILS_H + +#include "globals.h" +#include <stdbool.h> + +fint round_to_int(T a); + +T sqr_f(T a); + +fint sqr_i(fint a); + +fint max(fint a, fint b); + +bool is_approx_equal(T a, T b, T eps); + +void check_dir(const char *const prefix); + +#endif diff --git a/presentations/pasc/code/c_interface/vtk.c b/presentations/pasc/code/c_interface/vtk.c new file mode 100644 index 0000000000000000000000000000000000000000..db0913c0bca718f6cf90d58cdf7e712a35bfd455 --- /dev/null +++ b/presentations/pasc/code/c_interface/vtk.c @@ -0,0 +1,192 @@ +#include <stdbool.h> +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include "box.h" +#include "point.h" +#include "vtk.h" +#include "scalar_field.h" +#include "tensor_field.h" + +static void write_header(box domain, point origin, double delta_x, bool point_data, FILE *fp); +static void start_piece(box domain, bool point_data, FILE *fp); +static void end_piece(bool point_data, FILE *fp); +static void write_footer(FILE *fp); +static void vtk_file_write_header(vtk_file *vtk); +static void write_scalar_field(scalar_field field, char *name, FILE *fp); +static void write_tensor_field(tensor_field field, char *name, FILE *fp); +static char *get_float_type(); + +char *vtk_create_fname(char const * const prefix, char *name, fint i) { + char *buffer = malloc(80 * sizeof(char)); + sprintf(buffer, "%s%s_%08d.vti", prefix, name, i); + assert(strlen(buffer) < 80 && "Filename too long."); + return buffer; +} + + +vtk_file vtk_file_create(char *fname, box domain, point origin, T delta_x, bool point_data) { + vtk_file vtk; + vtk.domain = domain; + vtk.origin = origin; + vtk.delta_x = delta_x; + vtk.point_data = point_data; + vtk.header_written = false; + + FILE *file = fopen (fname,"w"); + assert(file != 0 && "Could not open file."); + vtk.fp = file; + + return vtk; +} + +vtk_file vtk_file_create_default(char *fname, box domain, T delta_x) { + point offset = point_create_default(); + return vtk_file_create(fname, domain, offset, delta_x, true); +} + +void vtk_file_write_scalar_field(vtk_file *vtk, scalar_field field, char *name) { + vtk_file_write_header(vtk); + write_scalar_field(field, name, vtk->fp); +} + +void vtk_file_write_tensor_field(vtk_file *vtk, tensor_field field, char *name) { + vtk_file_write_header(vtk); + write_tensor_field(field, name, vtk->fp); +} + +void vtk_file_close(vtk_file *vtk) { + assert(vtk->header_written && "Trying to close a file where nothing has been written"); + end_piece(vtk->point_data, vtk->fp); + write_footer(vtk->fp); + fint is_closed = fclose(vtk->fp); + assert(is_closed == 0 && "Error file not closed properly."); + vtk->header_written = false; +} + +static void vtk_file_write_header(vtk_file *vtk) { + if (!vtk->header_written) { + write_header(vtk->domain, vtk->origin, vtk->delta_x, vtk->point_data, vtk->fp); + start_piece(vtk->domain, vtk->point_data, vtk->fp); + vtk->header_written = true; + } +} + +static void write_scalar_field(scalar_field field, char *name, FILE *fp) { + assert(fp != 0 && "File not open."); + + char *flt = get_float_type(); + assert(flt != NULL && "Not a float"); + + fprintf(fp, "<DataArray type=\"%s\" Name=\"%s\" format=\"ascii\" NumberOfComponents=\"%d\">\n", flt, name, 1); + for (fint ix = 0; ix < field.nx; ++ix) { + for (fint iy = 0; iy < field.ny; ++iy) { + for (fint iz = 0; iz < field.nz; ++iz) { + // fprintf(fp, "%f ", field.grid[ix][iy][iz]); + + if (strcmp(flt, "Float32") == 0) { + fprintf(fp, "%.9g ", field.grid[ix][iy][iz]); + } else if (strcmp(flt, "Float64") == 0) { + fprintf(fp, "%.17g ", field.grid[ix][iy][iz]); + } else { + assert(false && "Error with the flt_type."); + } + } + } + } + fprintf(fp, "\n</DataArray>\n"); +} + +static void write_tensor_field(tensor_field field, char *name, FILE *fp) { + assert(fp != 0 && "File not open."); + + char *flt = get_float_type(); + assert(flt != NULL && "Not a float"); + + fprintf(fp, "<DataArray type=\"%s\" Name=\"%s\" format=\"ascii\" NumberOfComponents=\"%d\">\n", flt, name, field.q); + for (fint ix = 0; ix < field.nx; ++ix) { + for (fint iy = 0; iy < field.ny; ++iy) { + for (fint iz = 0; iz < field.nz; ++iz) { + for (fint iq = 0; iq < field.q; ++iq) { + // fprintf(fp, "%f ", field.grid[ix][iy][iz][iq]); + + if (strcmp(flt, "Float32") == 0) { + fprintf(fp, "%.9g ", field.grid[ix][iy][iz][iq]); + } else if (strcmp(flt, "Float64") == 0) { + fprintf(fp, "%.17g ", field.grid[ix][iy][iz][iq]); + } else { + assert(false && "Error with the flt_type."); + } + } + } + } + } + fprintf(fp, "\n</DataArray>\n"); +} + +static void write_header(box domain, point origin, double delta_x, bool point_data, FILE *fp) { + assert(fp != 0 && "File not open."); + + fprintf(fp, "<?xml version=\"1.0\"?>\n"); + fprintf(fp, "<VTKFile type=\"ImageData\" version=\"0.1\" byte_order=\"LittleEndian\">\n"); + if (!point_data) { + origin.x -= delta_x/2.; + origin.y -= delta_x/2.; + origin.z -= delta_x/2.; + domain.x1++; + domain.y1++; + domain.z1++; + } + + fprintf(fp, "<ImageData WholeExtent=\"%d %d %d %d %d %d\" ", domain.x0, domain.x1, domain.y0, domain.y1, domain.z0, domain.z1); + fprintf(fp, "Origin=\"%f %f %f\" ", origin.x, origin.y, origin.z); + fprintf(fp, "Spacing=\"%f %f %f\">\n", delta_x, delta_x, delta_x); +} + +static void start_piece(box domain, bool point_data, FILE *fp) { + assert(fp != 0 && "File not open."); + + if (!point_data) { + domain.x1++; + domain.y1++; + domain.z1++; + } + fprintf(fp, "<Piece Extent=\"%d %d %d %d %d %d\">\n", domain.x0, domain.x1, domain.y0, domain.y1, domain.z0, domain.z1); + + if (point_data) { + fprintf(fp, "<PointData>\n"); + } else { + fprintf(fp, "<CellData>\n"); + } +} + +static void end_piece(bool point_data, FILE *fp) { + assert(fp != 0 && "File not open."); + + if (point_data) { + fprintf(fp, "</PointData>\n"); + } + else { + fprintf(fp, "</CellData>\n"); + } + fprintf(fp, "</Piece>\n"); +} + +static void write_footer(FILE *fp) { + assert(fp != 0 && "File not open."); + + fprintf(fp, "</ImageData>\n"); + fprintf(fp, "</VTKFile>\n"); +} + +static char *get_float_type() { + if (8 == sizeof(T)) { + return "Float64"; + } else if (4 == sizeof(T)) { + return "Float32"; + } else { + return NULL; + } +} + diff --git a/presentations/pasc/code/c_interface/vtk.h b/presentations/pasc/code/c_interface/vtk.h new file mode 100644 index 0000000000000000000000000000000000000000..f26fbe0b83e2b7e6908f3bbe6d6bcd599c654345 --- /dev/null +++ b/presentations/pasc/code/c_interface/vtk.h @@ -0,0 +1,32 @@ +#ifndef VTK_H +#define VTK_H + +#include <stdio.h> +#include <stdbool.h> +#include "box.h" +#include "point.h" +#include "scalar_field.h" +#include "tensor_field.h" + +typedef struct { + FILE *fp; + box domain; + point origin; + T delta_x; + bool point_data; + bool header_written; +} vtk_file; + +vtk_file vtk_file_create(char *fname, box domain, point origin, T delta_x, bool point_data); + +vtk_file vtk_file_create_default(char *fname, box domain, T delta_x); + +void vtk_file_write_scalar_field(vtk_file *vtk, scalar_field field, char *name); + +void vtk_file_write_tensor_field(vtk_file *vtk, tensor_field field, char *name); + +void vtk_file_close(vtk_file *vtk); + +char *vtk_create_fname(char const * const prefix, char *name, fint i); + +#endif \ No newline at end of file diff --git a/presentations/pasc/code/data_analysis.c b/presentations/pasc/code/data_analysis.c new file mode 100644 index 0000000000000000000000000000000000000000..283350a3603da835bb44593014a5c139d92dcf1e --- /dev/null +++ b/presentations/pasc/code/data_analysis.c @@ -0,0 +1,2964 @@ +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wunused-function" +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wparentheses" +#pragma GCC diagnostic ignored "-Wunused-label" +#endif +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wparentheses" +#pragma clang diagnostic ignored "-Wunused-label" +#endif +// Headers + +#define _GNU_SOURCE +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> +#include <float.h> + + +// Initialisation + +struct futhark_context_config ; +struct futhark_context_config *futhark_context_config_new(void); +void futhark_context_config_free(struct futhark_context_config *cfg); +void futhark_context_config_set_debugging(struct futhark_context_config *cfg, + int flag); +void futhark_context_config_set_logging(struct futhark_context_config *cfg, + int flag); +struct futhark_context ; +struct futhark_context *futhark_context_new(struct futhark_context_config *cfg); +void futhark_context_free(struct futhark_context *ctx); + +// Arrays + + +// Opaque values + + +// Entry points + + +// Miscellaneous + +int futhark_context_sync(struct futhark_context *ctx); +int futhark_context_clear_caches(struct futhark_context *ctx); +char *futhark_context_report(struct futhark_context *ctx); +char *futhark_context_get_error(struct futhark_context *ctx); +void futhark_context_pause_profiling(struct futhark_context *ctx); +void futhark_context_unpause_profiling(struct futhark_context *ctx); +#define FUTHARK_BACKEND_c +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <math.h> +#include <stdint.h> +#undef NDEBUG +#include <assert.h> +#include <stdarg.h> +// Start of util.h. +// +// Various helper functions that are useful in all generated C code. + +#include <errno.h> +#include <string.h> + +static const char *fut_progname = "(embedded Futhark)"; + +static void futhark_panic(int eval, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + fprintf(stderr, "%s: ", fut_progname); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(eval); +} + +// For generating arbitrary-sized error messages. It is the callers +// responsibility to free the buffer at some point. +static char* msgprintf(const char *s, ...) { + va_list vl; + va_start(vl, s); + size_t needed = 1 + (size_t)vsnprintf(NULL, 0, s, vl); + char *buffer = (char*) malloc(needed); + va_start(vl, s); // Must re-init. + vsnprintf(buffer, needed, s, vl); + return buffer; +} + + +static inline void check_err(int errval, int sets_errno, const char *fun, int line, + const char *msg, ...) { + if (errval) { + char errnum[10]; + + va_list vl; + va_start(vl, msg); + + fprintf(stderr, "ERROR: "); + vfprintf(stderr, msg, vl); + fprintf(stderr, " in %s() at line %d with error code %s\n", + fun, line, + sets_errno ? strerror(errno) : errnum); + exit(errval); + } +} + +#define CHECK_ERR(err, msg...) check_err(err, 0, __func__, __LINE__, msg) +#define CHECK_ERRNO(err, msg...) check_err(err, 1, __func__, __LINE__, msg) + +// Read a file into a NUL-terminated string; returns NULL on error. +static void* slurp_file(const char *filename, size_t *size) { + unsigned char *s; + FILE *f = fopen(filename, "rb"); // To avoid Windows messing with linebreaks. + if (f == NULL) return NULL; + fseek(f, 0, SEEK_END); + size_t src_size = ftell(f); + fseek(f, 0, SEEK_SET); + s = (unsigned char*) malloc(src_size + 1); + if (fread(s, 1, src_size, f) != src_size) { + free(s); + s = NULL; + } else { + s[src_size] = '\0'; + } + fclose(f); + + if (size) { + *size = src_size; + } + + return s; +} + +// Dump 'n' bytes from 'buf' into the file at the designated location. +// Returns 0 on success. +static int dump_file(const char *file, const void *buf, size_t n) { + FILE *f = fopen(file, "w"); + + if (f == NULL) { + return 1; + } + + if (fwrite(buf, sizeof(char), n, f) != n) { + return 1; + } + + if (fclose(f) != 0) { + return 1; + } + + return 0; +} + +struct str_builder { + char *str; + size_t capacity; // Size of buffer. + size_t used; // Bytes used, *not* including final zero. +}; + +static void str_builder_init(struct str_builder *b) { + b->capacity = 10; + b->used = 0; + b->str = malloc(b->capacity); + b->str[0] = 0; +} + +static void str_builder(struct str_builder *b, const char *s, ...) { + va_list vl; + va_start(vl, s); + size_t needed = (size_t)vsnprintf(NULL, 0, s, vl); + + while (b->capacity < b->used + needed + 1) { + b->capacity *= 2; + b->str = realloc(b->str, b->capacity); + } + + va_start(vl, s); // Must re-init. + vsnprintf(b->str+b->used, b->capacity-b->used, s, vl); + b->used += needed; +} + +// End of util.h. + +// Start of timing.h. + +// The function get_wall_time() returns the wall time in microseconds +// (with an unspecified offset). + +#ifdef _WIN32 + +#include <windows.h> + +static int64_t get_wall_time(void) { + LARGE_INTEGER time,freq; + assert(QueryPerformanceFrequency(&freq)); + assert(QueryPerformanceCounter(&time)); + return ((double)time.QuadPart / freq.QuadPart) * 1000000; +} + +#else +// Assuming POSIX + +#include <time.h> +#include <sys/time.h> + +static int64_t get_wall_time(void) { + struct timeval time; + assert(gettimeofday(&time,NULL) == 0); + return time.tv_sec * 1000000 + time.tv_usec; +} + +static int64_t get_wall_time_ns(void) { + struct timespec time; + assert(clock_gettime(CLOCK_REALTIME, &time) == 0); + return time.tv_sec * 1000000000 + time.tv_nsec; +} + +#endif + +// End of timing.h. + +#include <string.h> +#include <inttypes.h> +#include <errno.h> +#include <ctype.h> +#include <errno.h> +#include <getopt.h> +// Start of values.h. + +//// Text I/O + +typedef int (*writer)(FILE*, const void*); +typedef int (*bin_reader)(void*); +typedef int (*str_reader)(const char *, void*); + +struct array_reader { + char* elems; + int64_t n_elems_space; + int64_t elem_size; + int64_t n_elems_used; + int64_t *shape; + str_reader elem_reader; +}; + +static void skipspaces(FILE *f) { + int c; + do { + c = getc(f); + } while (isspace(c)); + + if (c != EOF) { + ungetc(c, f); + } +} + +static int constituent(char c) { + return isalnum(c) || c == '.' || c == '-' || c == '+' || c == '_'; +} + +// Produces an empty token only on EOF. +static void next_token(FILE *f, char *buf, int bufsize) { + start: + skipspaces(f); + + int i = 0; + while (i < bufsize) { + int c = getc(f); + buf[i] = (char)c; + + if (c == EOF) { + buf[i] = 0; + return; + } else if (c == '-' && i == 1 && buf[0] == '-') { + // Line comment, so skip to end of line and start over. + for (; c != '\n' && c != EOF; c = getc(f)); + goto start; + } else if (!constituent((char)c)) { + if (i == 0) { + // We permit single-character tokens that are not + // constituents; this lets things like ']' and ',' be + // tokens. + buf[i+1] = 0; + return; + } else { + ungetc(c, f); + buf[i] = 0; + return; + } + } + + i++; + } + + buf[bufsize-1] = 0; +} + +static int next_token_is(FILE *f, char *buf, int bufsize, const char* expected) { + next_token(f, buf, bufsize); + return strcmp(buf, expected) == 0; +} + +static void remove_underscores(char *buf) { + char *w = buf; + + for (char *r = buf; *r; r++) { + if (*r != '_') { + *w++ = *r; + } + } + + *w++ = 0; +} + +static int read_str_elem(char *buf, struct array_reader *reader) { + int ret; + if (reader->n_elems_used == reader->n_elems_space) { + reader->n_elems_space *= 2; + reader->elems = (char*) realloc(reader->elems, + (size_t)(reader->n_elems_space * reader->elem_size)); + } + + ret = reader->elem_reader(buf, reader->elems + reader->n_elems_used * reader->elem_size); + + if (ret == 0) { + reader->n_elems_used++; + } + + return ret; +} + +static int read_str_array_elems(FILE *f, + char *buf, int bufsize, + struct array_reader *reader, int64_t dims) { + int ret; + int first = 1; + char *knows_dimsize = (char*) calloc((size_t)dims, sizeof(char)); + int cur_dim = dims-1; + int64_t *elems_read_in_dim = (int64_t*) calloc((size_t)dims, sizeof(int64_t)); + + while (1) { + next_token(f, buf, bufsize); + + if (strcmp(buf, "]") == 0) { + if (knows_dimsize[cur_dim]) { + if (reader->shape[cur_dim] != elems_read_in_dim[cur_dim]) { + ret = 1; + break; + } + } else { + knows_dimsize[cur_dim] = 1; + reader->shape[cur_dim] = elems_read_in_dim[cur_dim]; + } + if (cur_dim == 0) { + ret = 0; + break; + } else { + cur_dim--; + elems_read_in_dim[cur_dim]++; + } + } else if (strcmp(buf, ",") == 0) { + next_token(f, buf, bufsize); + if (strcmp(buf, "[") == 0) { + if (cur_dim == dims - 1) { + ret = 1; + break; + } + first = 1; + cur_dim++; + elems_read_in_dim[cur_dim] = 0; + } else if (cur_dim == dims - 1) { + ret = read_str_elem(buf, reader); + if (ret != 0) { + break; + } + elems_read_in_dim[cur_dim]++; + } else { + ret = 1; + break; + } + } else if (strlen(buf) == 0) { + // EOF + ret = 1; + break; + } else if (first) { + if (strcmp(buf, "[") == 0) { + if (cur_dim == dims - 1) { + ret = 1; + break; + } + cur_dim++; + elems_read_in_dim[cur_dim] = 0; + } else { + ret = read_str_elem(buf, reader); + if (ret != 0) { + break; + } + elems_read_in_dim[cur_dim]++; + first = 0; + } + } else { + ret = 1; + break; + } + } + + free(knows_dimsize); + free(elems_read_in_dim); + return ret; +} + +static int read_str_empty_array(FILE *f, char *buf, int bufsize, + const char *type_name, int64_t *shape, int64_t dims) { + if (strlen(buf) == 0) { + // EOF + return 1; + } + + if (strcmp(buf, "empty") != 0) { + return 1; + } + + if (!next_token_is(f, buf, bufsize, "(")) { + return 1; + } + + for (int i = 0; i < dims; i++) { + if (!next_token_is(f, buf, bufsize, "[")) { + return 1; + } + + next_token(f, buf, bufsize); + + if (sscanf(buf, "%"SCNu64, (uint64_t*)&shape[i]) != 1) { + return 1; + } + + if (!next_token_is(f, buf, bufsize, "]")) { + return 1; + } + } + + if (!next_token_is(f, buf, bufsize, type_name)) { + return 1; + } + + + if (!next_token_is(f, buf, bufsize, ")")) { + return 1; + } + + // Check whether the array really is empty. + for (int i = 0; i < dims; i++) { + if (shape[i] == 0) { + return 0; + } + } + + // Not an empty array! + return 1; +} + +static int read_str_array(FILE *f, + int64_t elem_size, str_reader elem_reader, + const char *type_name, + void **data, int64_t *shape, int64_t dims) { + int ret; + struct array_reader reader; + char buf[100]; + + int dims_seen; + for (dims_seen = 0; dims_seen < dims; dims_seen++) { + if (!next_token_is(f, buf, sizeof(buf), "[")) { + break; + } + } + + if (dims_seen == 0) { + return read_str_empty_array(f, buf, sizeof(buf), type_name, shape, dims); + } + + if (dims_seen != dims) { + return 1; + } + + reader.shape = shape; + reader.n_elems_used = 0; + reader.elem_size = elem_size; + reader.n_elems_space = 16; + reader.elems = (char*) realloc(*data, (size_t)(elem_size*reader.n_elems_space)); + reader.elem_reader = elem_reader; + + ret = read_str_array_elems(f, buf, sizeof(buf), &reader, dims); + + *data = reader.elems; + + return ret; +} + +#define READ_STR(MACRO, PTR, SUFFIX) \ + remove_underscores(buf); \ + int j; \ + if (sscanf(buf, "%"MACRO"%n", (PTR*)dest, &j) == 1) { \ + return !(strcmp(buf+j, "") == 0 || strcmp(buf+j, SUFFIX) == 0); \ + } else { \ + return 1; \ + } + +static int read_str_i8(char *buf, void* dest) { + // Some platforms (WINDOWS) does not support scanf %hhd or its + // cousin, %SCNi8. Read into int first to avoid corrupting + // memory. + // + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63417 + remove_underscores(buf); + int j, x; + if (sscanf(buf, "%i%n", &x, &j) == 1) { + *(int8_t*)dest = (int8_t)x; + return !(strcmp(buf+j, "") == 0 || strcmp(buf+j, "i8") == 0); + } else { + return 1; + } +} + +static int read_str_u8(char *buf, void* dest) { + // Some platforms (WINDOWS) does not support scanf %hhd or its + // cousin, %SCNu8. Read into int first to avoid corrupting + // memory. + // + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63417 + remove_underscores(buf); + int j, x; + if (sscanf(buf, "%i%n", &x, &j) == 1) { + *(uint8_t*)dest = (uint8_t)x; + return !(strcmp(buf+j, "") == 0 || strcmp(buf+j, "u8") == 0); + } else { + return 1; + } +} + +static int read_str_i16(char *buf, void* dest) { + READ_STR(SCNi16, int16_t, "i16"); +} + +static int read_str_u16(char *buf, void* dest) { + READ_STR(SCNi16, int16_t, "u16"); +} + +static int read_str_i32(char *buf, void* dest) { + READ_STR(SCNi32, int32_t, "i32"); +} + +static int read_str_u32(char *buf, void* dest) { + READ_STR(SCNi32, int32_t, "u32"); +} + +static int read_str_i64(char *buf, void* dest) { + READ_STR(SCNi64, int64_t, "i64"); +} + +static int read_str_u64(char *buf, void* dest) { + // FIXME: This is not correct, as SCNu64 only permits decimal + // literals. However, SCNi64 does not handle very large numbers + // correctly (it's really for signed numbers, so that's fair). + READ_STR(SCNu64, uint64_t, "u64"); +} + +static int read_str_f32(char *buf, void* dest) { + remove_underscores(buf); + if (strcmp(buf, "f32.nan") == 0) { + *(float*)dest = NAN; + return 0; + } else if (strcmp(buf, "f32.inf") == 0) { + *(float*)dest = INFINITY; + return 0; + } else if (strcmp(buf, "-f32.inf") == 0) { + *(float*)dest = -INFINITY; + return 0; + } else { + READ_STR("f", float, "f32"); + } +} + +static int read_str_f64(char *buf, void* dest) { + remove_underscores(buf); + if (strcmp(buf, "f64.nan") == 0) { + *(double*)dest = NAN; + return 0; + } else if (strcmp(buf, "f64.inf") == 0) { + *(double*)dest = INFINITY; + return 0; + } else if (strcmp(buf, "-f64.inf") == 0) { + *(double*)dest = -INFINITY; + return 0; + } else { + READ_STR("lf", double, "f64"); + } +} + +static int read_str_bool(char *buf, void* dest) { + if (strcmp(buf, "true") == 0) { + *(char*)dest = 1; + return 0; + } else if (strcmp(buf, "false") == 0) { + *(char*)dest = 0; + return 0; + } else { + return 1; + } +} + +static int write_str_i8(FILE *out, int8_t *src) { + return fprintf(out, "%hhdi8", *src); +} + +static int write_str_u8(FILE *out, uint8_t *src) { + return fprintf(out, "%hhuu8", *src); +} + +static int write_str_i16(FILE *out, int16_t *src) { + return fprintf(out, "%hdi16", *src); +} + +static int write_str_u16(FILE *out, uint16_t *src) { + return fprintf(out, "%huu16", *src); +} + +static int write_str_i32(FILE *out, int32_t *src) { + return fprintf(out, "%di32", *src); +} + +static int write_str_u32(FILE *out, uint32_t *src) { + return fprintf(out, "%uu32", *src); +} + +static int write_str_i64(FILE *out, int64_t *src) { + return fprintf(out, "%"PRIi64"i64", *src); +} + +static int write_str_u64(FILE *out, uint64_t *src) { + return fprintf(out, "%"PRIu64"u64", *src); +} + +static int write_str_f32(FILE *out, float *src) { + float x = *src; + if (isnan(x)) { + return fprintf(out, "f32.nan"); + } else if (isinf(x) && x >= 0) { + return fprintf(out, "f32.inf"); + } else if (isinf(x)) { + return fprintf(out, "-f32.inf"); + } else { + return fprintf(out, "%.6ff32", x); + } +} + +static int write_str_f64(FILE *out, double *src) { + double x = *src; + if (isnan(x)) { + return fprintf(out, "f64.nan"); + } else if (isinf(x) && x >= 0) { + return fprintf(out, "f64.inf"); + } else if (isinf(x)) { + return fprintf(out, "-f64.inf"); + } else { + return fprintf(out, "%.6ff64", *src); + } +} + +static int write_str_bool(FILE *out, void *src) { + return fprintf(out, *(char*)src ? "true" : "false"); +} + +//// Binary I/O + +#define BINARY_FORMAT_VERSION 2 +#define IS_BIG_ENDIAN (!*(unsigned char *)&(uint16_t){1}) + +static void flip_bytes(int elem_size, unsigned char *elem) { + for (int j=0; j<elem_size/2; j++) { + unsigned char head = elem[j]; + int tail_index = elem_size-1-j; + elem[j] = elem[tail_index]; + elem[tail_index] = head; + } +} + +// On Windows we need to explicitly set the file mode to not mangle +// newline characters. On *nix there is no difference. +#ifdef _WIN32 +#include <io.h> +#include <fcntl.h> +static void set_binary_mode(FILE *f) { + setmode(fileno(f), O_BINARY); +} +#else +static void set_binary_mode(FILE *f) { + (void)f; +} +#endif + +static int read_byte(FILE *f, void* dest) { + int num_elems_read = fread(dest, 1, 1, f); + return num_elems_read == 1 ? 0 : 1; +} + +//// Types + +struct primtype_info_t { + const char binname[4]; // Used for parsing binary data. + const char* type_name; // Same name as in Futhark. + const int64_t size; // in bytes + const writer write_str; // Write in text format. + const str_reader read_str; // Read in text format. +}; + +static const struct primtype_info_t i8_info = + {.binname = " i8", .type_name = "i8", .size = 1, + .write_str = (writer)write_str_i8, .read_str = (str_reader)read_str_i8}; +static const struct primtype_info_t i16_info = + {.binname = " i16", .type_name = "i16", .size = 2, + .write_str = (writer)write_str_i16, .read_str = (str_reader)read_str_i16}; +static const struct primtype_info_t i32_info = + {.binname = " i32", .type_name = "i32", .size = 4, + .write_str = (writer)write_str_i32, .read_str = (str_reader)read_str_i32}; +static const struct primtype_info_t i64_info = + {.binname = " i64", .type_name = "i64", .size = 8, + .write_str = (writer)write_str_i64, .read_str = (str_reader)read_str_i64}; +static const struct primtype_info_t u8_info = + {.binname = " u8", .type_name = "u8", .size = 1, + .write_str = (writer)write_str_u8, .read_str = (str_reader)read_str_u8}; +static const struct primtype_info_t u16_info = + {.binname = " u16", .type_name = "u16", .size = 2, + .write_str = (writer)write_str_u16, .read_str = (str_reader)read_str_u16}; +static const struct primtype_info_t u32_info = + {.binname = " u32", .type_name = "u32", .size = 4, + .write_str = (writer)write_str_u32, .read_str = (str_reader)read_str_u32}; +static const struct primtype_info_t u64_info = + {.binname = " u64", .type_name = "u64", .size = 8, + .write_str = (writer)write_str_u64, .read_str = (str_reader)read_str_u64}; +static const struct primtype_info_t f32_info = + {.binname = " f32", .type_name = "f32", .size = 4, + .write_str = (writer)write_str_f32, .read_str = (str_reader)read_str_f32}; +static const struct primtype_info_t f64_info = + {.binname = " f64", .type_name = "f64", .size = 8, + .write_str = (writer)write_str_f64, .read_str = (str_reader)read_str_f64}; +static const struct primtype_info_t bool_info = + {.binname = "bool", .type_name = "bool", .size = 1, + .write_str = (writer)write_str_bool, .read_str = (str_reader)read_str_bool}; + +static const struct primtype_info_t* primtypes[] = { + &i8_info, &i16_info, &i32_info, &i64_info, + &u8_info, &u16_info, &u32_info, &u64_info, + &f32_info, &f64_info, + &bool_info, + NULL // NULL-terminated +}; + +// General value interface. All endian business taken care of at +// lower layers. + +static int read_is_binary(FILE *f) { + skipspaces(f); + int c = getc(f); + if (c == 'b') { + int8_t bin_version; + int ret = read_byte(f, &bin_version); + + if (ret != 0) { futhark_panic(1, "binary-input: could not read version.\n"); } + + if (bin_version != BINARY_FORMAT_VERSION) { + futhark_panic(1, "binary-input: File uses version %i, but I only understand version %i.\n", + bin_version, BINARY_FORMAT_VERSION); + } + + return 1; + } + ungetc(c, f); + return 0; +} + +static const struct primtype_info_t* read_bin_read_type_enum(FILE *f) { + char read_binname[4]; + + int num_matched = fscanf(f, "%4c", read_binname); + if (num_matched != 1) { futhark_panic(1, "binary-input: Couldn't read element type.\n"); } + + const struct primtype_info_t **type = primtypes; + + for (; *type != NULL; type++) { + // I compare the 4 characters manually instead of using strncmp because + // this allows any value to be used, also NULL bytes + if (memcmp(read_binname, (*type)->binname, 4) == 0) { + return *type; + } + } + futhark_panic(1, "binary-input: Did not recognize the type '%s'.\n", read_binname); + return NULL; +} + +static void read_bin_ensure_scalar(FILE *f, const struct primtype_info_t *expected_type) { + int8_t bin_dims; + int ret = read_byte(f, &bin_dims); + if (ret != 0) { futhark_panic(1, "binary-input: Couldn't get dims.\n"); } + + if (bin_dims != 0) { + futhark_panic(1, "binary-input: Expected scalar (0 dimensions), but got array with %i dimensions.\n", + bin_dims); + } + + const struct primtype_info_t *bin_type = read_bin_read_type_enum(f); + if (bin_type != expected_type) { + futhark_panic(1, "binary-input: Expected scalar of type %s but got scalar of type %s.\n", + expected_type->type_name, + bin_type->type_name); + } +} + +//// High-level interface + +static int read_bin_array(FILE *f, + const struct primtype_info_t *expected_type, void **data, int64_t *shape, int64_t dims) { + int ret; + + int8_t bin_dims; + ret = read_byte(f, &bin_dims); + if (ret != 0) { futhark_panic(1, "binary-input: Couldn't get dims.\n"); } + + if (bin_dims != dims) { + futhark_panic(1, "binary-input: Expected %i dimensions, but got array with %i dimensions.\n", + dims, bin_dims); + } + + const struct primtype_info_t *bin_primtype = read_bin_read_type_enum(f); + if (expected_type != bin_primtype) { + futhark_panic(1, "binary-input: Expected %iD-array with element type '%s' but got %iD-array with element type '%s'.\n", + dims, expected_type->type_name, dims, bin_primtype->type_name); + } + + int64_t elem_count = 1; + for (int i=0; i<dims; i++) { + int64_t bin_shape; + ret = fread(&bin_shape, sizeof(bin_shape), 1, f); + if (ret != 1) { + futhark_panic(1, "binary-input: Couldn't read size for dimension %i of array.\n", i); + } + if (IS_BIG_ENDIAN) { + flip_bytes(sizeof(bin_shape), (unsigned char*) &bin_shape); + } + elem_count *= bin_shape; + shape[i] = bin_shape; + } + + int64_t elem_size = expected_type->size; + void* tmp = realloc(*data, (size_t)(elem_count * elem_size)); + if (tmp == NULL) { + futhark_panic(1, "binary-input: Failed to allocate array of size %i.\n", + elem_count * elem_size); + } + *data = tmp; + + int64_t num_elems_read = (int64_t)fread(*data, (size_t)elem_size, (size_t)elem_count, f); + if (num_elems_read != elem_count) { + futhark_panic(1, "binary-input: tried to read %i elements of an array, but only got %i elements.\n", + elem_count, num_elems_read); + } + + // If we're on big endian platform we must change all multibyte elements + // from using little endian to big endian + if (IS_BIG_ENDIAN && elem_size != 1) { + flip_bytes(elem_size, (unsigned char*) *data); + } + + return 0; +} + +static int read_array(FILE *f, const struct primtype_info_t *expected_type, void **data, int64_t *shape, int64_t dims) { + if (!read_is_binary(f)) { + return read_str_array(f, expected_type->size, (str_reader)expected_type->read_str, expected_type->type_name, data, shape, dims); + } else { + return read_bin_array(f, expected_type, data, shape, dims); + } +} + +static int end_of_input(FILE *f) { + skipspaces(f); + char token[2]; + next_token(f, token, sizeof(token)); + if (strcmp(token, "") == 0) { + return 0; + } else { + return 1; + } +} + +static int write_str_array(FILE *out, + const struct primtype_info_t *elem_type, + const unsigned char *data, + const int64_t *shape, + int8_t rank) { + if (rank==0) { + elem_type->write_str(out, (void*)data); + } else { + int64_t len = (int64_t)shape[0]; + int64_t slice_size = 1; + + int64_t elem_size = elem_type->size; + for (int8_t i = 1; i < rank; i++) { + slice_size *= shape[i]; + } + + if (len*slice_size == 0) { + fprintf(out, "empty("); + for (int64_t i = 0; i < rank; i++) { + fprintf(out, "[%"PRIi64"]", shape[i]); + } + fprintf(out, "%s", elem_type->type_name); + fprintf(out, ")"); + } else if (rank==1) { + fputc('[', out); + for (int64_t i = 0; i < len; i++) { + elem_type->write_str(out, (void*) (data + i * elem_size)); + if (i != len-1) { + fprintf(out, ", "); + } + } + fputc(']', out); + } else { + fputc('[', out); + for (int64_t i = 0; i < len; i++) { + write_str_array(out, elem_type, data + i * slice_size * elem_size, shape+1, rank-1); + if (i != len-1) { + fprintf(out, ", "); + } + } + fputc(']', out); + } + } + return 0; +} + +static int write_bin_array(FILE *out, + const struct primtype_info_t *elem_type, + const unsigned char *data, + const int64_t *shape, + int8_t rank) { + int64_t num_elems = 1; + for (int64_t i = 0; i < rank; i++) { + num_elems *= shape[i]; + } + + fputc('b', out); + fputc((char)BINARY_FORMAT_VERSION, out); + fwrite(&rank, sizeof(int8_t), 1, out); + fwrite(elem_type->binname, 4, 1, out); + if (shape != NULL) { + fwrite(shape, sizeof(int64_t), (size_t)rank, out); + } + + if (IS_BIG_ENDIAN) { + for (int64_t i = 0; i < num_elems; i++) { + const unsigned char *elem = data+i*elem_type->size; + for (int64_t j = 0; j < elem_type->size; j++) { + fwrite(&elem[elem_type->size-j], 1, 1, out); + } + } + } else { + fwrite(data, (size_t)elem_type->size, (size_t)num_elems, out); + } + + return 0; +} + +static int write_array(FILE *out, int write_binary, + const struct primtype_info_t *elem_type, + const void *data, + const int64_t *shape, + const int8_t rank) { + if (write_binary) { + return write_bin_array(out, elem_type, data, shape, rank); + } else { + return write_str_array(out, elem_type, data, shape, rank); + } +} + +static int read_scalar(FILE *f, + const struct primtype_info_t *expected_type, void *dest) { + if (!read_is_binary(f)) { + char buf[100]; + next_token(f, buf, sizeof(buf)); + return expected_type->read_str(buf, dest); + } else { + read_bin_ensure_scalar(f, expected_type); + int64_t elem_size = expected_type->size; + int num_elems_read = fread(dest, (size_t)elem_size, 1, f); + if (IS_BIG_ENDIAN) { + flip_bytes(elem_size, (unsigned char*) dest); + } + return num_elems_read == 1 ? 0 : 1; + } +} + +static int write_scalar(FILE *out, int write_binary, const struct primtype_info_t *type, void *src) { + if (write_binary) { + return write_bin_array(out, type, src, NULL, 0); + } else { + return type->write_str(out, src); + } +} + +// End of values.h. + +static int binary_output = 0; +static FILE *runtime_file; +static int perform_warmup = 0; +static int num_runs = 1; +static const char *entry_point = "main"; +// Start of tuning.h. + +static char* load_tuning_file(const char *fname, + void *cfg, + int (*set_size)(void*, const char*, size_t)) { + const int max_line_len = 1024; + char* line = (char*) malloc(max_line_len); + + FILE *f = fopen(fname, "r"); + + if (f == NULL) { + snprintf(line, max_line_len, "Cannot open file: %s", strerror(errno)); + return line; + } + + int lineno = 0; + while (fgets(line, max_line_len, f) != NULL) { + lineno++; + char *eql = strstr(line, "="); + if (eql) { + *eql = 0; + int value = atoi(eql+1); + if (set_size(cfg, line, value) != 0) { + strncpy(eql+1, line, max_line_len-strlen(line)-1); + snprintf(line, max_line_len, "Unknown name '%s' on line %d.", eql+1, lineno); + return line; + } + } else { + snprintf(line, max_line_len, "Invalid line %d (must be of form 'name=int').", + lineno); + return line; + } + } + + free(line); + + return NULL; +} + +// End of tuning.h. + +int parse_options(struct futhark_context_config *cfg, int argc, + char *const argv[]) +{ + int ch; + static struct option long_options[] = {{"write-runtime-to", + required_argument, NULL, 1}, + {"runs", required_argument, NULL, 2}, + {"debugging", no_argument, NULL, 3}, + {"log", no_argument, NULL, 4}, + {"entry-point", required_argument, + NULL, 5}, {"binary-output", + no_argument, NULL, 6}, + {"help", no_argument, NULL, 7}, {0, + 0, + 0, + 0}}; + static char *option_descriptions = + " -t/--write-runtime-to FILE Print the time taken to execute the program to the indicated file, an integral number of microseconds.\n -r/--runs INT Perform NUM runs of the program.\n -D/--debugging Perform possibly expensive internal correctness checks and verbose logging.\n -L/--log Print various low-overhead logging information to stderr while running.\n -e/--entry-point NAME The entry point to run. Defaults to main.\n -b/--binary-output Print the program result in the binary output format.\n -h/--help Print help information and exit.\n"; + + while ((ch = getopt_long(argc, argv, ":t:r:DLe:bh", long_options, NULL)) != + -1) { + if (ch == 1 || ch == 't') { + runtime_file = fopen(optarg, "w"); + if (runtime_file == NULL) + futhark_panic(1, "Cannot open %s: %s\n", optarg, + strerror(errno)); + } + if (ch == 2 || ch == 'r') { + num_runs = atoi(optarg); + perform_warmup = 1; + if (num_runs <= 0) + futhark_panic(1, "Need a positive number of runs, not %s\n", + optarg); + } + if (ch == 3 || ch == 'D') + futhark_context_config_set_debugging(cfg, 1); + if (ch == 4 || ch == 'L') + futhark_context_config_set_logging(cfg, 1); + if (ch == 5 || ch == 'e') { + if (entry_point != NULL) + entry_point = optarg; + } + if (ch == 6 || ch == 'b') + binary_output = 1; + if (ch == 7 || ch == 'h') { + printf("Usage: %s [OPTION]...\nOptions:\n\n%s\nFor more information, consult the Futhark User's Guide or the man pages.\n", + fut_progname, option_descriptions); + exit(0); + } + if (ch == ':') + futhark_panic(-1, "Missing argument for option %s\n", argv[optind - + 1]); + if (ch == '?') { + fprintf(stderr, "Usage: %s: %s\n", fut_progname, + " -t/--write-runtime-to FILE Print the time taken to execute the program to the indicated file, an integral number of microseconds.\n -r/--runs INT Perform NUM runs of the program.\n -D/--debugging Perform possibly expensive internal correctness checks and verbose logging.\n -L/--log Print various low-overhead logging information to stderr while running.\n -e/--entry-point NAME The entry point to run. Defaults to main.\n -b/--binary-output Print the program result in the binary output format.\n -h/--help Print help information and exit.\n"); + futhark_panic(1, "Unknown option: %s\n", argv[optind - 1]); + } + } + return optind; +} +typedef void entry_point_fun(struct futhark_context *); +struct entry_point_entry { + const char *name; + entry_point_fun *fun; +} ; +int main(int argc, char **argv) +{ + fut_progname = argv[0]; + + struct entry_point_entry entry_points[] = {}; + struct futhark_context_config *cfg = futhark_context_config_new(); + + assert(cfg != NULL); + + int parsed_options = parse_options(cfg, argc, argv); + + argc -= parsed_options; + argv += parsed_options; + if (argc != 0) + futhark_panic(1, "Excess non-option: %s\n", argv[0]); + + struct futhark_context *ctx = futhark_context_new(cfg); + + assert(ctx != NULL); + + char *error = futhark_context_get_error(ctx); + + if (error != NULL) + futhark_panic(1, "%s", error); + if (entry_point != NULL) { + int num_entry_points = sizeof(entry_points) / sizeof(entry_points[0]); + entry_point_fun *entry_point_fun = NULL; + + for (int i = 0; i < num_entry_points; i++) { + if (strcmp(entry_points[i].name, entry_point) == 0) { + entry_point_fun = entry_points[i].fun; + break; + } + } + if (entry_point_fun == NULL) { + fprintf(stderr, + "No entry point '%s'. Select another with --entry-point. Options are:\n", + entry_point); + for (int i = 0; i < num_entry_points; i++) + fprintf(stderr, "%s\n", entry_points[i].name); + return 1; + } + entry_point_fun(ctx); + if (runtime_file != NULL) + fclose(runtime_file); + + char *report = futhark_context_report(ctx); + + fputs(report, stderr); + free(report); + } + futhark_context_free(ctx); + futhark_context_config_free(cfg); + return 0; +} +#ifdef _MSC_VER +#define inline __inline +#endif +#include <string.h> +#include <inttypes.h> +#include <ctype.h> +#include <errno.h> +#include <assert.h> + +// Start of lock.h. + +// A very simple cross-platform implementation of locks. Uses +// pthreads on Unix and some Windows thing there. Futhark's +// host-level code is not multithreaded, but user code may be, so we +// need some mechanism for ensuring atomic access to API functions. +// This is that mechanism. It is not exposed to user code at all, so +// we do not have to worry about name collisions. + +#ifdef _WIN32 + +typedef HANDLE lock_t; + +static void create_lock(lock_t *lock) { + *lock = CreateMutex(NULL, // Default security attributes. + FALSE, // Initially unlocked. + NULL); // Unnamed. +} + +static void lock_lock(lock_t *lock) { + assert(WaitForSingleObject(*lock, INFINITE) == WAIT_OBJECT_0); +} + +static void lock_unlock(lock_t *lock) { + assert(ReleaseMutex(*lock)); +} + +static void free_lock(lock_t *lock) { + CloseHandle(*lock); +} + +#else +// Assuming POSIX + +#include <pthread.h> + +typedef pthread_mutex_t lock_t; + +static void create_lock(lock_t *lock) { + int r = pthread_mutex_init(lock, NULL); + assert(r == 0); +} + +static void lock_lock(lock_t *lock) { + int r = pthread_mutex_lock(lock); + assert(r == 0); +} + +static void lock_unlock(lock_t *lock) { + int r = pthread_mutex_unlock(lock); + assert(r == 0); +} + +static void free_lock(lock_t *lock) { + // Nothing to do for pthreads. + (void)lock; +} + +#endif + +// End of lock.h. + +static inline uint8_t add8(uint8_t x, uint8_t y) +{ + return x + y; +} +static inline uint16_t add16(uint16_t x, uint16_t y) +{ + return x + y; +} +static inline uint32_t add32(uint32_t x, uint32_t y) +{ + return x + y; +} +static inline uint64_t add64(uint64_t x, uint64_t y) +{ + return x + y; +} +static inline uint8_t sub8(uint8_t x, uint8_t y) +{ + return x - y; +} +static inline uint16_t sub16(uint16_t x, uint16_t y) +{ + return x - y; +} +static inline uint32_t sub32(uint32_t x, uint32_t y) +{ + return x - y; +} +static inline uint64_t sub64(uint64_t x, uint64_t y) +{ + return x - y; +} +static inline uint8_t mul8(uint8_t x, uint8_t y) +{ + return x * y; +} +static inline uint16_t mul16(uint16_t x, uint16_t y) +{ + return x * y; +} +static inline uint32_t mul32(uint32_t x, uint32_t y) +{ + return x * y; +} +static inline uint64_t mul64(uint64_t x, uint64_t y) +{ + return x * y; +} +static inline uint8_t udiv8(uint8_t x, uint8_t y) +{ + return x / y; +} +static inline uint16_t udiv16(uint16_t x, uint16_t y) +{ + return x / y; +} +static inline uint32_t udiv32(uint32_t x, uint32_t y) +{ + return x / y; +} +static inline uint64_t udiv64(uint64_t x, uint64_t y) +{ + return x / y; +} +static inline uint8_t udiv_up8(uint8_t x, uint8_t y) +{ + return (x + y - 1) / y; +} +static inline uint16_t udiv_up16(uint16_t x, uint16_t y) +{ + return (x + y - 1) / y; +} +static inline uint32_t udiv_up32(uint32_t x, uint32_t y) +{ + return (x + y - 1) / y; +} +static inline uint64_t udiv_up64(uint64_t x, uint64_t y) +{ + return (x + y - 1) / y; +} +static inline uint8_t umod8(uint8_t x, uint8_t y) +{ + return x % y; +} +static inline uint16_t umod16(uint16_t x, uint16_t y) +{ + return x % y; +} +static inline uint32_t umod32(uint32_t x, uint32_t y) +{ + return x % y; +} +static inline uint64_t umod64(uint64_t x, uint64_t y) +{ + return x % y; +} +static inline uint8_t udiv_safe8(uint8_t x, uint8_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline uint16_t udiv_safe16(uint16_t x, uint16_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline uint32_t udiv_safe32(uint32_t x, uint32_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline uint64_t udiv_safe64(uint64_t x, uint64_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline uint8_t udiv_up_safe8(uint8_t x, uint8_t y) +{ + return y == 0 ? 0 : (x + y - 1) / y; +} +static inline uint16_t udiv_up_safe16(uint16_t x, uint16_t y) +{ + return y == 0 ? 0 : (x + y - 1) / y; +} +static inline uint32_t udiv_up_safe32(uint32_t x, uint32_t y) +{ + return y == 0 ? 0 : (x + y - 1) / y; +} +static inline uint64_t udiv_up_safe64(uint64_t x, uint64_t y) +{ + return y == 0 ? 0 : (x + y - 1) / y; +} +static inline uint8_t umod_safe8(uint8_t x, uint8_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline uint16_t umod_safe16(uint16_t x, uint16_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline uint32_t umod_safe32(uint32_t x, uint32_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline uint64_t umod_safe64(uint64_t x, uint64_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline int8_t sdiv8(int8_t x, int8_t y) +{ + int8_t q = x / y; + int8_t r = x % y; + + return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); +} +static inline int16_t sdiv16(int16_t x, int16_t y) +{ + int16_t q = x / y; + int16_t r = x % y; + + return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); +} +static inline int32_t sdiv32(int32_t x, int32_t y) +{ + int32_t q = x / y; + int32_t r = x % y; + + return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); +} +static inline int64_t sdiv64(int64_t x, int64_t y) +{ + int64_t q = x / y; + int64_t r = x % y; + + return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); +} +static inline int8_t sdiv_up8(int8_t x, int8_t y) +{ + return sdiv8(x + y - 1, y); +} +static inline int16_t sdiv_up16(int16_t x, int16_t y) +{ + return sdiv16(x + y - 1, y); +} +static inline int32_t sdiv_up32(int32_t x, int32_t y) +{ + return sdiv32(x + y - 1, y); +} +static inline int64_t sdiv_up64(int64_t x, int64_t y) +{ + return sdiv64(x + y - 1, y); +} +static inline int8_t smod8(int8_t x, int8_t y) +{ + int8_t r = x % y; + + return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); +} +static inline int16_t smod16(int16_t x, int16_t y) +{ + int16_t r = x % y; + + return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); +} +static inline int32_t smod32(int32_t x, int32_t y) +{ + int32_t r = x % y; + + return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); +} +static inline int64_t smod64(int64_t x, int64_t y) +{ + int64_t r = x % y; + + return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); +} +static inline int8_t sdiv_safe8(int8_t x, int8_t y) +{ + return y == 0 ? 0 : sdiv8(x, y); +} +static inline int16_t sdiv_safe16(int16_t x, int16_t y) +{ + return y == 0 ? 0 : sdiv16(x, y); +} +static inline int32_t sdiv_safe32(int32_t x, int32_t y) +{ + return y == 0 ? 0 : sdiv32(x, y); +} +static inline int64_t sdiv_safe64(int64_t x, int64_t y) +{ + return y == 0 ? 0 : sdiv64(x, y); +} +static inline int8_t sdiv_up_safe8(int8_t x, int8_t y) +{ + return sdiv_safe8(x + y - 1, y); +} +static inline int16_t sdiv_up_safe16(int16_t x, int16_t y) +{ + return sdiv_safe16(x + y - 1, y); +} +static inline int32_t sdiv_up_safe32(int32_t x, int32_t y) +{ + return sdiv_safe32(x + y - 1, y); +} +static inline int64_t sdiv_up_safe64(int64_t x, int64_t y) +{ + return sdiv_safe64(x + y - 1, y); +} +static inline int8_t smod_safe8(int8_t x, int8_t y) +{ + return y == 0 ? 0 : smod8(x, y); +} +static inline int16_t smod_safe16(int16_t x, int16_t y) +{ + return y == 0 ? 0 : smod16(x, y); +} +static inline int32_t smod_safe32(int32_t x, int32_t y) +{ + return y == 0 ? 0 : smod32(x, y); +} +static inline int64_t smod_safe64(int64_t x, int64_t y) +{ + return y == 0 ? 0 : smod64(x, y); +} +static inline int8_t squot8(int8_t x, int8_t y) +{ + return x / y; +} +static inline int16_t squot16(int16_t x, int16_t y) +{ + return x / y; +} +static inline int32_t squot32(int32_t x, int32_t y) +{ + return x / y; +} +static inline int64_t squot64(int64_t x, int64_t y) +{ + return x / y; +} +static inline int8_t srem8(int8_t x, int8_t y) +{ + return x % y; +} +static inline int16_t srem16(int16_t x, int16_t y) +{ + return x % y; +} +static inline int32_t srem32(int32_t x, int32_t y) +{ + return x % y; +} +static inline int64_t srem64(int64_t x, int64_t y) +{ + return x % y; +} +static inline int8_t squot_safe8(int8_t x, int8_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline int16_t squot_safe16(int16_t x, int16_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline int32_t squot_safe32(int32_t x, int32_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline int64_t squot_safe64(int64_t x, int64_t y) +{ + return y == 0 ? 0 : x / y; +} +static inline int8_t srem_safe8(int8_t x, int8_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline int16_t srem_safe16(int16_t x, int16_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline int32_t srem_safe32(int32_t x, int32_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline int64_t srem_safe64(int64_t x, int64_t y) +{ + return y == 0 ? 0 : x % y; +} +static inline int8_t smin8(int8_t x, int8_t y) +{ + return x < y ? x : y; +} +static inline int16_t smin16(int16_t x, int16_t y) +{ + return x < y ? x : y; +} +static inline int32_t smin32(int32_t x, int32_t y) +{ + return x < y ? x : y; +} +static inline int64_t smin64(int64_t x, int64_t y) +{ + return x < y ? x : y; +} +static inline uint8_t umin8(uint8_t x, uint8_t y) +{ + return x < y ? x : y; +} +static inline uint16_t umin16(uint16_t x, uint16_t y) +{ + return x < y ? x : y; +} +static inline uint32_t umin32(uint32_t x, uint32_t y) +{ + return x < y ? x : y; +} +static inline uint64_t umin64(uint64_t x, uint64_t y) +{ + return x < y ? x : y; +} +static inline int8_t smax8(int8_t x, int8_t y) +{ + return x < y ? y : x; +} +static inline int16_t smax16(int16_t x, int16_t y) +{ + return x < y ? y : x; +} +static inline int32_t smax32(int32_t x, int32_t y) +{ + return x < y ? y : x; +} +static inline int64_t smax64(int64_t x, int64_t y) +{ + return x < y ? y : x; +} +static inline uint8_t umax8(uint8_t x, uint8_t y) +{ + return x < y ? y : x; +} +static inline uint16_t umax16(uint16_t x, uint16_t y) +{ + return x < y ? y : x; +} +static inline uint32_t umax32(uint32_t x, uint32_t y) +{ + return x < y ? y : x; +} +static inline uint64_t umax64(uint64_t x, uint64_t y) +{ + return x < y ? y : x; +} +static inline uint8_t shl8(uint8_t x, uint8_t y) +{ + return x << y; +} +static inline uint16_t shl16(uint16_t x, uint16_t y) +{ + return x << y; +} +static inline uint32_t shl32(uint32_t x, uint32_t y) +{ + return x << y; +} +static inline uint64_t shl64(uint64_t x, uint64_t y) +{ + return x << y; +} +static inline uint8_t lshr8(uint8_t x, uint8_t y) +{ + return x >> y; +} +static inline uint16_t lshr16(uint16_t x, uint16_t y) +{ + return x >> y; +} +static inline uint32_t lshr32(uint32_t x, uint32_t y) +{ + return x >> y; +} +static inline uint64_t lshr64(uint64_t x, uint64_t y) +{ + return x >> y; +} +static inline int8_t ashr8(int8_t x, int8_t y) +{ + return x >> y; +} +static inline int16_t ashr16(int16_t x, int16_t y) +{ + return x >> y; +} +static inline int32_t ashr32(int32_t x, int32_t y) +{ + return x >> y; +} +static inline int64_t ashr64(int64_t x, int64_t y) +{ + return x >> y; +} +static inline uint8_t and8(uint8_t x, uint8_t y) +{ + return x & y; +} +static inline uint16_t and16(uint16_t x, uint16_t y) +{ + return x & y; +} +static inline uint32_t and32(uint32_t x, uint32_t y) +{ + return x & y; +} +static inline uint64_t and64(uint64_t x, uint64_t y) +{ + return x & y; +} +static inline uint8_t or8(uint8_t x, uint8_t y) +{ + return x | y; +} +static inline uint16_t or16(uint16_t x, uint16_t y) +{ + return x | y; +} +static inline uint32_t or32(uint32_t x, uint32_t y) +{ + return x | y; +} +static inline uint64_t or64(uint64_t x, uint64_t y) +{ + return x | y; +} +static inline uint8_t xor8(uint8_t x, uint8_t y) +{ + return x ^ y; +} +static inline uint16_t xor16(uint16_t x, uint16_t y) +{ + return x ^ y; +} +static inline uint32_t xor32(uint32_t x, uint32_t y) +{ + return x ^ y; +} +static inline uint64_t xor64(uint64_t x, uint64_t y) +{ + return x ^ y; +} +static inline bool ult8(uint8_t x, uint8_t y) +{ + return x < y; +} +static inline bool ult16(uint16_t x, uint16_t y) +{ + return x < y; +} +static inline bool ult32(uint32_t x, uint32_t y) +{ + return x < y; +} +static inline bool ult64(uint64_t x, uint64_t y) +{ + return x < y; +} +static inline bool ule8(uint8_t x, uint8_t y) +{ + return x <= y; +} +static inline bool ule16(uint16_t x, uint16_t y) +{ + return x <= y; +} +static inline bool ule32(uint32_t x, uint32_t y) +{ + return x <= y; +} +static inline bool ule64(uint64_t x, uint64_t y) +{ + return x <= y; +} +static inline bool slt8(int8_t x, int8_t y) +{ + return x < y; +} +static inline bool slt16(int16_t x, int16_t y) +{ + return x < y; +} +static inline bool slt32(int32_t x, int32_t y) +{ + return x < y; +} +static inline bool slt64(int64_t x, int64_t y) +{ + return x < y; +} +static inline bool sle8(int8_t x, int8_t y) +{ + return x <= y; +} +static inline bool sle16(int16_t x, int16_t y) +{ + return x <= y; +} +static inline bool sle32(int32_t x, int32_t y) +{ + return x <= y; +} +static inline bool sle64(int64_t x, int64_t y) +{ + return x <= y; +} +static inline int8_t pow8(int8_t x, int8_t y) +{ + int8_t res = 1, rem = y; + + while (rem != 0) { + if (rem & 1) + res *= x; + rem >>= 1; + x *= x; + } + return res; +} +static inline int16_t pow16(int16_t x, int16_t y) +{ + int16_t res = 1, rem = y; + + while (rem != 0) { + if (rem & 1) + res *= x; + rem >>= 1; + x *= x; + } + return res; +} +static inline int32_t pow32(int32_t x, int32_t y) +{ + int32_t res = 1, rem = y; + + while (rem != 0) { + if (rem & 1) + res *= x; + rem >>= 1; + x *= x; + } + return res; +} +static inline int64_t pow64(int64_t x, int64_t y) +{ + int64_t res = 1, rem = y; + + while (rem != 0) { + if (rem & 1) + res *= x; + rem >>= 1; + x *= x; + } + return res; +} +static inline bool itob_i8_bool(int8_t x) +{ + return x; +} +static inline bool itob_i16_bool(int16_t x) +{ + return x; +} +static inline bool itob_i32_bool(int32_t x) +{ + return x; +} +static inline bool itob_i64_bool(int64_t x) +{ + return x; +} +static inline int8_t btoi_bool_i8(bool x) +{ + return x; +} +static inline int16_t btoi_bool_i16(bool x) +{ + return x; +} +static inline int32_t btoi_bool_i32(bool x) +{ + return x; +} +static inline int64_t btoi_bool_i64(bool x) +{ + return x; +} +#define sext_i8_i8(x) ((int8_t) (int8_t) x) +#define sext_i8_i16(x) ((int16_t) (int8_t) x) +#define sext_i8_i32(x) ((int32_t) (int8_t) x) +#define sext_i8_i64(x) ((int64_t) (int8_t) x) +#define sext_i16_i8(x) ((int8_t) (int16_t) x) +#define sext_i16_i16(x) ((int16_t) (int16_t) x) +#define sext_i16_i32(x) ((int32_t) (int16_t) x) +#define sext_i16_i64(x) ((int64_t) (int16_t) x) +#define sext_i32_i8(x) ((int8_t) (int32_t) x) +#define sext_i32_i16(x) ((int16_t) (int32_t) x) +#define sext_i32_i32(x) ((int32_t) (int32_t) x) +#define sext_i32_i64(x) ((int64_t) (int32_t) x) +#define sext_i64_i8(x) ((int8_t) (int64_t) x) +#define sext_i64_i16(x) ((int16_t) (int64_t) x) +#define sext_i64_i32(x) ((int32_t) (int64_t) x) +#define sext_i64_i64(x) ((int64_t) (int64_t) x) +#define zext_i8_i8(x) ((int8_t) (uint8_t) x) +#define zext_i8_i16(x) ((int16_t) (uint8_t) x) +#define zext_i8_i32(x) ((int32_t) (uint8_t) x) +#define zext_i8_i64(x) ((int64_t) (uint8_t) x) +#define zext_i16_i8(x) ((int8_t) (uint16_t) x) +#define zext_i16_i16(x) ((int16_t) (uint16_t) x) +#define zext_i16_i32(x) ((int32_t) (uint16_t) x) +#define zext_i16_i64(x) ((int64_t) (uint16_t) x) +#define zext_i32_i8(x) ((int8_t) (uint32_t) x) +#define zext_i32_i16(x) ((int16_t) (uint32_t) x) +#define zext_i32_i32(x) ((int32_t) (uint32_t) x) +#define zext_i32_i64(x) ((int64_t) (uint32_t) x) +#define zext_i64_i8(x) ((int8_t) (uint64_t) x) +#define zext_i64_i16(x) ((int16_t) (uint64_t) x) +#define zext_i64_i32(x) ((int32_t) (uint64_t) x) +#define zext_i64_i64(x) ((int64_t) (uint64_t) x) +#if defined(__OPENCL_VERSION__) +static int32_t futrts_popc8(int8_t x) +{ + return popcount(x); +} +static int32_t futrts_popc16(int16_t x) +{ + return popcount(x); +} +static int32_t futrts_popc32(int32_t x) +{ + return popcount(x); +} +static int32_t futrts_popc64(int64_t x) +{ + return popcount(x); +} +#elif defined(__CUDA_ARCH__) +static int32_t futrts_popc8(int8_t x) +{ + return __popc(zext_i8_i32(x)); +} +static int32_t futrts_popc16(int16_t x) +{ + return __popc(zext_i16_i32(x)); +} +static int32_t futrts_popc32(int32_t x) +{ + return __popc(x); +} +static int32_t futrts_popc64(int64_t x) +{ + return __popcll(x); +} +#else +static int32_t futrts_popc8(int8_t x) +{ + int c = 0; + + for (; x; ++c) + x &= x - 1; + return c; +} +static int32_t futrts_popc16(int16_t x) +{ + int c = 0; + + for (; x; ++c) + x &= x - 1; + return c; +} +static int32_t futrts_popc32(int32_t x) +{ + int c = 0; + + for (; x; ++c) + x &= x - 1; + return c; +} +static int32_t futrts_popc64(int64_t x) +{ + int c = 0; + + for (; x; ++c) + x &= x - 1; + return c; +} +#endif +#if defined(__OPENCL_VERSION__) +static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b) +{ + return mul_hi(a, b); +} +static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b) +{ + return mul_hi(a, b); +} +static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b) +{ + return mul_hi(a, b); +} +static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b) +{ + return mul_hi(a, b); +} +#elif defined(__CUDA_ARCH__) +static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b) +{ + uint16_t aa = a; + uint16_t bb = b; + + return aa * bb >> 8; +} +static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b) +{ + uint32_t aa = a; + uint32_t bb = b; + + return aa * bb >> 16; +} +static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b) +{ + return mulhi(a, b); +} +static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b) +{ + return mul64hi(a, b); +} +#else +static uint8_t futrts_mul_hi8(uint8_t a, uint8_t b) +{ + uint16_t aa = a; + uint16_t bb = b; + + return aa * bb >> 8; +} +static uint16_t futrts_mul_hi16(uint16_t a, uint16_t b) +{ + uint32_t aa = a; + uint32_t bb = b; + + return aa * bb >> 16; +} +static uint32_t futrts_mul_hi32(uint32_t a, uint32_t b) +{ + uint64_t aa = a; + uint64_t bb = b; + + return aa * bb >> 32; +} +static uint64_t futrts_mul_hi64(uint64_t a, uint64_t b) +{ + __uint128_t aa = a; + __uint128_t bb = b; + + return aa * bb >> 64; +} +#endif +#if defined(__OPENCL_VERSION__) +static uint8_t futrts_mad_hi8(uint8_t a, uint8_t b, uint8_t c) +{ + return mad_hi(a, b, c); +} +static uint16_t futrts_mad_hi16(uint16_t a, uint16_t b, uint16_t c) +{ + return mad_hi(a, b, c); +} +static uint32_t futrts_mad_hi32(uint32_t a, uint32_t b, uint32_t c) +{ + return mad_hi(a, b, c); +} +static uint64_t futrts_mad_hi64(uint64_t a, uint64_t b, uint64_t c) +{ + return mad_hi(a, b, c); +} +#else +static uint8_t futrts_mad_hi8(uint8_t a, uint8_t b, uint8_t c) +{ + return futrts_mul_hi8(a, b) + c; +} +static uint16_t futrts_mad_hi16(uint16_t a, uint16_t b, uint16_t c) +{ + return futrts_mul_hi16(a, b) + c; +} +static uint32_t futrts_mad_hi32(uint32_t a, uint32_t b, uint32_t c) +{ + return futrts_mul_hi32(a, b) + c; +} +static uint64_t futrts_mad_hi64(uint64_t a, uint64_t b, uint64_t c) +{ + return futrts_mul_hi64(a, b) + c; +} +#endif +#if defined(__OPENCL_VERSION__) +static int32_t futrts_clzz8(int8_t x) +{ + return clz(x); +} +static int32_t futrts_clzz16(int16_t x) +{ + return clz(x); +} +static int32_t futrts_clzz32(int32_t x) +{ + return clz(x); +} +static int32_t futrts_clzz64(int64_t x) +{ + return clz(x); +} +#elif defined(__CUDA_ARCH__) +static int32_t futrts_clzz8(int8_t x) +{ + return __clz(zext_i8_i32(x)) - 24; +} +static int32_t futrts_clzz16(int16_t x) +{ + return __clz(zext_i16_i32(x)) - 16; +} +static int32_t futrts_clzz32(int32_t x) +{ + return __clz(x); +} +static int32_t futrts_clzz64(int64_t x) +{ + return __clzll(x); +} +#else +static int32_t futrts_clzz8(int8_t x) +{ + int n = 0; + int bits = sizeof(x) * 8; + + for (int i = 0; i < bits; i++) { + if (x < 0) + break; + n++; + x <<= 1; + } + return n; +} +static int32_t futrts_clzz16(int16_t x) +{ + int n = 0; + int bits = sizeof(x) * 8; + + for (int i = 0; i < bits; i++) { + if (x < 0) + break; + n++; + x <<= 1; + } + return n; +} +static int32_t futrts_clzz32(int32_t x) +{ + int n = 0; + int bits = sizeof(x) * 8; + + for (int i = 0; i < bits; i++) { + if (x < 0) + break; + n++; + x <<= 1; + } + return n; +} +static int32_t futrts_clzz64(int64_t x) +{ + int n = 0; + int bits = sizeof(x) * 8; + + for (int i = 0; i < bits; i++) { + if (x < 0) + break; + n++; + x <<= 1; + } + return n; +} +#endif +#if defined(__OPENCL_VERSION__) +static int32_t futrts_ctzz8(int8_t x) +{ + int i = 0; + + for (; i < 8 && (x & 1) == 0; i++, x >>= 1) + ; + return i; +} +static int32_t futrts_ctzz16(int16_t x) +{ + int i = 0; + + for (; i < 16 && (x & 1) == 0; i++, x >>= 1) + ; + return i; +} +static int32_t futrts_ctzz32(int32_t x) +{ + int i = 0; + + for (; i < 32 && (x & 1) == 0; i++, x >>= 1) + ; + return i; +} +static int32_t futrts_ctzz64(int64_t x) +{ + int i = 0; + + for (; i < 64 && (x & 1) == 0; i++, x >>= 1) + ; + return i; +} +#elif defined(__CUDA_ARCH__) +static int32_t futrts_ctzz8(int8_t x) +{ + int y = __ffs(x); + + return y == 0 ? 8 : y - 1; +} +static int32_t futrts_ctzz16(int16_t x) +{ + int y = __ffs(x); + + return y == 0 ? 16 : y - 1; +} +static int32_t futrts_ctzz32(int32_t x) +{ + int y = __ffs(x); + + return y == 0 ? 32 : y - 1; +} +static int32_t futrts_ctzz64(int64_t x) +{ + int y = __ffsll(x); + + return y == 0 ? 64 : y - 1; +} +#else +static int32_t futrts_ctzz8(int8_t x) +{ + return x == 0 ? 8 : __builtin_ctz((uint32_t) x); +} +static int32_t futrts_ctzz16(int16_t x) +{ + return x == 0 ? 16 : __builtin_ctz((uint32_t) x); +} +static int32_t futrts_ctzz32(int32_t x) +{ + return x == 0 ? 32 : __builtin_ctz(x); +} +static int32_t futrts_ctzz64(int64_t x) +{ + return x == 0 ? 64 : __builtin_ctzll(x); +} +#endif +static inline float fdiv32(float x, float y) +{ + return x / y; +} +static inline float fadd32(float x, float y) +{ + return x + y; +} +static inline float fsub32(float x, float y) +{ + return x - y; +} +static inline float fmul32(float x, float y) +{ + return x * y; +} +static inline float fmin32(float x, float y) +{ + return fmin(x, y); +} +static inline float fmax32(float x, float y) +{ + return fmax(x, y); +} +static inline float fpow32(float x, float y) +{ + return pow(x, y); +} +static inline bool cmplt32(float x, float y) +{ + return x < y; +} +static inline bool cmple32(float x, float y) +{ + return x <= y; +} +static inline float sitofp_i8_f32(int8_t x) +{ + return (float) x; +} +static inline float sitofp_i16_f32(int16_t x) +{ + return (float) x; +} +static inline float sitofp_i32_f32(int32_t x) +{ + return (float) x; +} +static inline float sitofp_i64_f32(int64_t x) +{ + return (float) x; +} +static inline float uitofp_i8_f32(uint8_t x) +{ + return (float) x; +} +static inline float uitofp_i16_f32(uint16_t x) +{ + return (float) x; +} +static inline float uitofp_i32_f32(uint32_t x) +{ + return (float) x; +} +static inline float uitofp_i64_f32(uint64_t x) +{ + return (float) x; +} +static inline int8_t fptosi_f32_i8(float x) +{ + return (int8_t) x; +} +static inline int16_t fptosi_f32_i16(float x) +{ + return (int16_t) x; +} +static inline int32_t fptosi_f32_i32(float x) +{ + return (int32_t) x; +} +static inline int64_t fptosi_f32_i64(float x) +{ + return (int64_t) x; +} +static inline uint8_t fptoui_f32_i8(float x) +{ + return (uint8_t) x; +} +static inline uint16_t fptoui_f32_i16(float x) +{ + return (uint16_t) x; +} +static inline uint32_t fptoui_f32_i32(float x) +{ + return (uint32_t) x; +} +static inline uint64_t fptoui_f32_i64(float x) +{ + return (uint64_t) x; +} +static inline double fdiv64(double x, double y) +{ + return x / y; +} +static inline double fadd64(double x, double y) +{ + return x + y; +} +static inline double fsub64(double x, double y) +{ + return x - y; +} +static inline double fmul64(double x, double y) +{ + return x * y; +} +static inline double fmin64(double x, double y) +{ + return fmin(x, y); +} +static inline double fmax64(double x, double y) +{ + return fmax(x, y); +} +static inline double fpow64(double x, double y) +{ + return pow(x, y); +} +static inline bool cmplt64(double x, double y) +{ + return x < y; +} +static inline bool cmple64(double x, double y) +{ + return x <= y; +} +static inline double sitofp_i8_f64(int8_t x) +{ + return (double) x; +} +static inline double sitofp_i16_f64(int16_t x) +{ + return (double) x; +} +static inline double sitofp_i32_f64(int32_t x) +{ + return (double) x; +} +static inline double sitofp_i64_f64(int64_t x) +{ + return (double) x; +} +static inline double uitofp_i8_f64(uint8_t x) +{ + return (double) x; +} +static inline double uitofp_i16_f64(uint16_t x) +{ + return (double) x; +} +static inline double uitofp_i32_f64(uint32_t x) +{ + return (double) x; +} +static inline double uitofp_i64_f64(uint64_t x) +{ + return (double) x; +} +static inline int8_t fptosi_f64_i8(double x) +{ + return (int8_t) x; +} +static inline int16_t fptosi_f64_i16(double x) +{ + return (int16_t) x; +} +static inline int32_t fptosi_f64_i32(double x) +{ + return (int32_t) x; +} +static inline int64_t fptosi_f64_i64(double x) +{ + return (int64_t) x; +} +static inline uint8_t fptoui_f64_i8(double x) +{ + return (uint8_t) x; +} +static inline uint16_t fptoui_f64_i16(double x) +{ + return (uint16_t) x; +} +static inline uint32_t fptoui_f64_i32(double x) +{ + return (uint32_t) x; +} +static inline uint64_t fptoui_f64_i64(double x) +{ + return (uint64_t) x; +} +static inline float fpconv_f32_f32(float x) +{ + return (float) x; +} +static inline double fpconv_f32_f64(float x) +{ + return (double) x; +} +static inline float fpconv_f64_f32(double x) +{ + return (float) x; +} +static inline double fpconv_f64_f64(double x) +{ + return (double) x; +} +static inline float futrts_log32(float x) +{ + return log(x); +} +static inline float futrts_log2_32(float x) +{ + return log2(x); +} +static inline float futrts_log10_32(float x) +{ + return log10(x); +} +static inline float futrts_sqrt32(float x) +{ + return sqrt(x); +} +static inline float futrts_exp32(float x) +{ + return exp(x); +} +static inline float futrts_cos32(float x) +{ + return cos(x); +} +static inline float futrts_sin32(float x) +{ + return sin(x); +} +static inline float futrts_tan32(float x) +{ + return tan(x); +} +static inline float futrts_acos32(float x) +{ + return acos(x); +} +static inline float futrts_asin32(float x) +{ + return asin(x); +} +static inline float futrts_atan32(float x) +{ + return atan(x); +} +static inline float futrts_cosh32(float x) +{ + return cosh(x); +} +static inline float futrts_sinh32(float x) +{ + return sinh(x); +} +static inline float futrts_tanh32(float x) +{ + return tanh(x); +} +static inline float futrts_acosh32(float x) +{ + return acosh(x); +} +static inline float futrts_asinh32(float x) +{ + return asinh(x); +} +static inline float futrts_atanh32(float x) +{ + return atanh(x); +} +static inline float futrts_atan2_32(float x, float y) +{ + return atan2(x, y); +} +static inline float futrts_gamma32(float x) +{ + return tgamma(x); +} +static inline float futrts_lgamma32(float x) +{ + return lgamma(x); +} +static inline bool futrts_isnan32(float x) +{ + return isnan(x); +} +static inline bool futrts_isinf32(float x) +{ + return isinf(x); +} +static inline int32_t futrts_to_bits32(float x) +{ + union { + float f; + int32_t t; + } p; + + p.f = x; + return p.t; +} +static inline float futrts_from_bits32(int32_t x) +{ + union { + int32_t f; + float t; + } p; + + p.f = x; + return p.t; +} +#ifdef __OPENCL_VERSION__ +static inline float fmod32(float x, float y) +{ + return fmod(x, y); +} +static inline float futrts_round32(float x) +{ + return rint(x); +} +static inline float futrts_floor32(float x) +{ + return floor(x); +} +static inline float futrts_ceil32(float x) +{ + return ceil(x); +} +static inline float futrts_lerp32(float v0, float v1, float t) +{ + return mix(v0, v1, t); +} +static inline float futrts_mad32(float a, float b, float c) +{ + return mad(a, b, c); +} +static inline float futrts_fma32(float a, float b, float c) +{ + return fma(a, b, c); +} +#else +static inline float fmod32(float x, float y) +{ + return fmodf(x, y); +} +static inline float futrts_round32(float x) +{ + return rintf(x); +} +static inline float futrts_floor32(float x) +{ + return floorf(x); +} +static inline float futrts_ceil32(float x) +{ + return ceilf(x); +} +static inline float futrts_lerp32(float v0, float v1, float t) +{ + return v0 + (v1 - v0) * t; +} +static inline float futrts_mad32(float a, float b, float c) +{ + return a * b + c; +} +static inline float futrts_fma32(float a, float b, float c) +{ + return fmaf(a, b, c); +} +#endif +static inline double futrts_log64(double x) +{ + return log(x); +} +static inline double futrts_log2_64(double x) +{ + return log2(x); +} +static inline double futrts_log10_64(double x) +{ + return log10(x); +} +static inline double futrts_sqrt64(double x) +{ + return sqrt(x); +} +static inline double futrts_exp64(double x) +{ + return exp(x); +} +static inline double futrts_cos64(double x) +{ + return cos(x); +} +static inline double futrts_sin64(double x) +{ + return sin(x); +} +static inline double futrts_tan64(double x) +{ + return tan(x); +} +static inline double futrts_acos64(double x) +{ + return acos(x); +} +static inline double futrts_asin64(double x) +{ + return asin(x); +} +static inline double futrts_atan64(double x) +{ + return atan(x); +} +static inline double futrts_cosh64(double x) +{ + return cosh(x); +} +static inline double futrts_sinh64(double x) +{ + return sinh(x); +} +static inline double futrts_tanh64(double x) +{ + return tanh(x); +} +static inline double futrts_acosh64(double x) +{ + return acosh(x); +} +static inline double futrts_asinh64(double x) +{ + return asinh(x); +} +static inline double futrts_atanh64(double x) +{ + return atanh(x); +} +static inline double futrts_atan2_64(double x, double y) +{ + return atan2(x, y); +} +static inline double futrts_gamma64(double x) +{ + return tgamma(x); +} +static inline double futrts_lgamma64(double x) +{ + return lgamma(x); +} +static inline double futrts_fma64(double a, double b, double c) +{ + return fma(a, b, c); +} +static inline double futrts_round64(double x) +{ + return rint(x); +} +static inline double futrts_ceil64(double x) +{ + return ceil(x); +} +static inline double futrts_floor64(double x) +{ + return floor(x); +} +static inline bool futrts_isnan64(double x) +{ + return isnan(x); +} +static inline bool futrts_isinf64(double x) +{ + return isinf(x); +} +static inline int64_t futrts_to_bits64(double x) +{ + union { + double f; + int64_t t; + } p; + + p.f = x; + return p.t; +} +static inline double futrts_from_bits64(int64_t x) +{ + union { + int64_t f; + double t; + } p; + + p.f = x; + return p.t; +} +static inline double fmod64(double x, double y) +{ + return fmod(x, y); +} +#ifdef __OPENCL_VERSION__ +static inline double futrts_lerp64(double v0, double v1, double t) +{ + return mix(v0, v1, t); +} +static inline double futrts_mad64(double a, double b, double c) +{ + return mad(a, b, c); +} +#else +static inline double futrts_lerp64(double v0, double v1, double t) +{ + return v0 + (v1 - v0) * t; +} +static inline double futrts_mad64(double a, double b, double c) +{ + return a * b + c; +} +#endif +static int init_constants(struct futhark_context *); +static int free_constants(struct futhark_context *); +struct memblock { + int *references; + char *mem; + int64_t size; + const char *desc; +} ; +struct futhark_context_config { + int debugging; +} ; +struct futhark_context_config *futhark_context_config_new(void) +{ + struct futhark_context_config *cfg = + (struct futhark_context_config *) malloc(sizeof(struct futhark_context_config)); + + if (cfg == NULL) + return NULL; + cfg->debugging = 0; + return cfg; +} +void futhark_context_config_free(struct futhark_context_config *cfg) +{ + free(cfg); +} +void futhark_context_config_set_debugging(struct futhark_context_config *cfg, + int detail) +{ + cfg->debugging = detail; +} +void futhark_context_config_set_logging(struct futhark_context_config *cfg, + int detail) +{ + /* Does nothing for this backend. */ + (void) cfg; + (void) detail; +} +struct futhark_context { + int detail_memory; + int debugging; + int profiling; + lock_t lock; + char *error; + int profiling_paused; + int64_t peak_mem_usage_default; + int64_t cur_mem_usage_default; + struct { + int dummy; + } constants; +} ; +struct futhark_context *futhark_context_new(struct futhark_context_config *cfg) +{ + struct futhark_context *ctx = + (struct futhark_context *) malloc(sizeof(struct futhark_context)); + + if (ctx == NULL) + return NULL; + ctx->detail_memory = cfg->debugging; + ctx->debugging = cfg->debugging; + ctx->profiling = cfg->debugging; + ctx->error = NULL; + create_lock(&ctx->lock); + ctx->peak_mem_usage_default = 0; + ctx->cur_mem_usage_default = 0; + init_constants(ctx); + return ctx; +} +void futhark_context_free(struct futhark_context *ctx) +{ + free_constants(ctx); + free_lock(&ctx->lock); + free(ctx); +} +int futhark_context_sync(struct futhark_context *ctx) +{ + (void) ctx; + return 0; +} +int futhark_context_clear_caches(struct futhark_context *ctx) +{ + (void) ctx; + return 0; +} +static int memblock_unref(struct futhark_context *ctx, struct memblock *block, + const char *desc) +{ + if (block->references != NULL) { + *block->references -= 1; + if (ctx->detail_memory) + fprintf(stderr, + "Unreferencing block %s (allocated as %s) in %s: %d references remaining.\n", + desc, block->desc, "default space", *block->references); + if (*block->references == 0) { + ctx->cur_mem_usage_default -= block->size; + free(block->mem); + free(block->references); + if (ctx->detail_memory) + fprintf(stderr, + "%lld bytes freed (now allocated: %lld bytes)\n", + (long long) block->size, + (long long) ctx->cur_mem_usage_default); + } + block->references = NULL; + } + return 0; +} +static int memblock_alloc(struct futhark_context *ctx, struct memblock *block, + int64_t size, const char *desc) +{ + if (size < 0) + futhark_panic(1, + "Negative allocation of %lld bytes attempted for %s in %s.\n", + (long long) size, desc, "default space", + ctx->cur_mem_usage_default); + + int ret = memblock_unref(ctx, block, desc); + + ctx->cur_mem_usage_default += size; + if (ctx->detail_memory) + fprintf(stderr, + "Allocating %lld bytes for %s in %s (then allocated: %lld bytes)", + (long long) size, desc, "default space", + (long long) ctx->cur_mem_usage_default); + if (ctx->cur_mem_usage_default > ctx->peak_mem_usage_default) { + ctx->peak_mem_usage_default = ctx->cur_mem_usage_default; + if (ctx->detail_memory) + fprintf(stderr, " (new peak).\n"); + } else if (ctx->detail_memory) + fprintf(stderr, ".\n"); + block->mem = (char *) malloc(size); + block->references = (int *) malloc(sizeof(int)); + *block->references = 1; + block->size = size; + block->desc = desc; + return ret; +} +static int memblock_set(struct futhark_context *ctx, struct memblock *lhs, + struct memblock *rhs, const char *lhs_desc) +{ + int ret = memblock_unref(ctx, lhs, lhs_desc); + + if (rhs->references != NULL) + (*rhs->references)++; + *lhs = *rhs; + return ret; +} +char *futhark_context_report(struct futhark_context *ctx) +{ + struct str_builder builder; + + str_builder_init(&builder); + if (ctx->detail_memory || ctx->profiling) { + { } + } + if (ctx->profiling) { } + return builder.str; +} +char *futhark_context_get_error(struct futhark_context *ctx) +{ + char *error = ctx->error; + + ctx->error = NULL; + return error; +} +void futhark_context_pause_profiling(struct futhark_context *ctx) +{ + ctx->profiling_paused = 1; +} +void futhark_context_unpause_profiling(struct futhark_context *ctx) +{ + ctx->profiling_paused = 0; +} +static int init_constants(struct futhark_context *ctx) +{ + (void) ctx; + + int err = 0; + + + cleanup: + return err; +} +static int free_constants(struct futhark_context *ctx) +{ + (void) ctx; + return 0; +} diff --git a/presentations/pasc/code/gfx.c b/presentations/pasc/code/gfx.c new file mode 100644 index 0000000000000000000000000000000000000000..e5bc36e4d2eeaae679a9b4c96de5200c43effac5 --- /dev/null +++ b/presentations/pasc/code/gfx.c @@ -0,0 +1,98 @@ +#ifdef PTK_SDL + +/// @file gfx.c +/// @author Florent Gluck +/// @date November 6, 2016 +/// Helper routines to render pixels in fullscreen graphic mode. +/// Uses the SDL2 library. + +#include "gfx.h" + +/// Create a fullscreen graphic window. +/// @param title Title of the window. +/// @param width Width of the window in pixels. +/// @param height Height of the window in pixels. +/// @return a pointer to the graphic context or NULL if it failed. +struct gfx_context_t* gfx_create(char *title, uint width, uint height) { + if (SDL_Init(SDL_INIT_VIDEO) != 0) goto error; + SDL_Window *window = SDL_CreateWindow(title, SDL_WINDOWPOS_CENTERED, + SDL_WINDOWPOS_CENTERED, width, height, SDL_WINDOW_RESIZABLE); + SDL_Renderer *renderer = SDL_CreateRenderer(window, -1, 0); + SDL_Texture *texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, + SDL_TEXTUREACCESS_STREAMING, width, height); + uint32_t *pixels = malloc(width*height*sizeof(uint32_t)); + struct gfx_context_t *ctxt = malloc(sizeof(struct gfx_context_t)); + + if (!window || !renderer || !texture || !pixels || !ctxt) goto error; + + ctxt->renderer = renderer; + ctxt->texture = texture; + ctxt->window = window; + ctxt->width = width; + ctxt->height = height; + ctxt->pixels = pixels; + + SDL_ShowCursor(SDL_DISABLE); + gfx_clear(ctxt, COLOR_BLACK); + return ctxt; + +error: + return NULL; +} + +/// Draw a pixel in the specified graphic context. +/// @param ctxt Graphic context where the pixel is to be drawn. +/// @param x X coordinate of the pixel. +/// @param y Y coordinate of the pixel. +/// @param color Color of the pixel. +void gfx_putpixel(struct gfx_context_t *ctxt, int x, int y, uint32_t color) { + if (x < ctxt->width && y < ctxt->height) + ctxt->pixels[ctxt->width*y+x] = color; +} + +/// Clear the specified graphic context. +/// @param ctxt Graphic context to clear. +/// @param color Color to use. +void gfx_clear(struct gfx_context_t *ctxt, uint32_t color) { + int n = ctxt->width*ctxt->height; + while (n) + ctxt->pixels[--n] = color; +} + +/// Display the graphic context. +/// @param ctxt Graphic context to clear. +void gfx_present(struct gfx_context_t *ctxt) { + SDL_UpdateTexture(ctxt->texture, NULL, ctxt->pixels, ctxt->width*sizeof(uint32_t)); + SDL_RenderCopy(ctxt->renderer, ctxt->texture, NULL, NULL); + SDL_RenderPresent(ctxt->renderer); +} + +/// Destroy a graphic window. +/// @param ctxt Graphic context of the window to close. +void gfx_destroy(struct gfx_context_t *ctxt) { + SDL_ShowCursor(SDL_ENABLE); + SDL_DestroyTexture(ctxt->texture); + SDL_DestroyRenderer(ctxt->renderer); + SDL_DestroyWindow(ctxt->window); + free(ctxt->pixels); + ctxt->texture = NULL; + ctxt->renderer = NULL; + ctxt->window = NULL; + ctxt->pixels = NULL; + SDL_Quit(); + free(ctxt); +} + +/// If a key was pressed, returns its key code (non blocking call). +/// List of key codes: https://wiki.libsdl.org/SDL_Keycode +/// SDL_PumpEvents() must be called before. +/// @return 0 if escape was not pressed. +SDL_Keycode gfx_keypressed() { + const Uint8 *state = SDL_GetKeyboardState(NULL); + if (state && state[SDL_SCANCODE_ESCAPE]) { + return SDLK_ESCAPE; + } + return 0; +} + +#endif \ No newline at end of file diff --git a/presentations/pasc/code/gfx.h b/presentations/pasc/code/gfx.h new file mode 100644 index 0000000000000000000000000000000000000000..fea1f9688660a3c129ac9a2a944eec4a048acd79 --- /dev/null +++ b/presentations/pasc/code/gfx.h @@ -0,0 +1,42 @@ +#ifdef PTK_SDL + +#ifndef _GFX_H_ +#define _GFX_H_ + +#include <stdint.h> +#include <SDL2/SDL.h> + +#define MAKE_COLOR(r,g,b) ((uint32_t)b|((uint32_t)g<<8)|((uint32_t)r<<16)) + +#define COLOR_BLACK 0x00000000 +#define COLOR_RED 0x00FF0000 +#define COLOR_GREEN 0x0000FF00 +#define COLOR_BLUE 0x000000FF +#define COLOR_WHITE 0x00FFFFFF +#define COLOR_YELLOW 0x00FFFF00 + +#define MAXCOLOR 255 + +typedef unsigned int uint; +typedef unsigned long ulong; +typedef unsigned char uchar; + +struct gfx_context_t { + SDL_Window *window; + SDL_Renderer *renderer; + SDL_Texture *texture; + uint32_t *pixels; + int width; + int height; +}; + +extern void gfx_putpixel(struct gfx_context_t *ctxt, int x, int y, uint32_t color); +extern void gfx_clear(struct gfx_context_t *ctxt, uint32_t color); +extern struct gfx_context_t* gfx_create(char *text, uint width, uint height); +extern void gfx_destroy(struct gfx_context_t *ctxt); +extern void gfx_present(struct gfx_context_t *ctxt); +extern SDL_Keycode gfx_keypressed(); + +#endif + +#endif diff --git a/presentations/pasc/code/lbm.c b/presentations/pasc/code/lbm.c new file mode 100644 index 0000000000000000000000000000000000000000..cba3802ee21ed156dbe9d24a2841d47935770481 --- /dev/null +++ b/presentations/pasc/code/lbm.c @@ -0,0 +1,337 @@ +#include "liblbm.h" +#include "gfx.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <time.h> +#include <float.h> +#include <math.h> + +#define q 9 +#define d 2 + +typedef float T; + +const float w[q] = {4.0 / 9.0, 1.0 / 36.0, 1.0 / 9.0, 1.0 / 36.0, 1.0 / 9.0, 1.0 / 36.0, 1.0 / 9.0, 1.0 / 36.0, 1.0 / 9.0}; +const int c[d][q] = { {0, -1, -1, -1, 0, +1, +1, +1, 0}, + {0, +1, 0, -1, -1, -1, 0, +1,+1} }; + +/** + * Print how the program is supposed to be called + */ +void usage() { + printf("%s\n%s\n%s\n%s\n%s\n", "\nusage:\n ./lbm <nx> <ny> <tau> <iter>", + "where:", " <nx> and <ny> are the mesh dimensions ( >= 10),", + " <tau> is tau ( 1/2 < tau <= 2 )", + " <iter> is the number of iterations"); +} + +float compute_max(float **field, int nx, int ny) { + float max = FLT_MIN; + for(int ix = 0; ix < nx; ++ix) { + for(int iy = 0; iy < ny; ++iy){ + if (field[ix][iy] > max) { + max = field[ix][iy]; + } + } + } + return max; +} + +float compute_min(float **field, int nx, int ny) { + float min = FLT_MAX; + for(int ix = 0; ix < nx; ++ix) { + for(int iy = 0; iy < ny; ++iy){ + if (field[ix][iy] < min) { + min = field[ix][iy]; + } + } + } + return min; +} + +float convert_to_zero_one(float num, float min, float max) { + return (num - min) / (max - min); +} + + +float ***allocate_3d(int nx, int ny, int nz) { + float ***f = (float ***)malloc(nx * sizeof(float **)); + for (int ix = 0; ix < nx; ++ix) { + f[ix] = malloc(ny * sizeof(float *)); + for (int iy = 0; iy < ny; ++iy) { + f[ix][iy] = malloc(nz * sizeof(float)); + } + } + return f; +} + +float compute_feq(float w, const int c[d], float rho, float u[d]) { + float c_u = c[0] * u[0]; + float u_sqr = u[0] * u[0]; + for (int id = 1; id < d; ++id) { + c_u += c[id] * u[id]; + u_sqr += u[id] * u[id]; + } + return w * rho * (1.0 + 3.0 * c_u + 4.5 * c_u * c_u - 1.5 * u_sqr); +} + + +float ***allocate_and_ini_at_equilibrium(float rho, float u[d], int nx, int ny) { + float ***f = allocate_3d(nx, ny, q); + for (int ix = 0; ix < nx; ++ix) { + for (int iy = 0; iy < ny; ++iy) { + for (int ipop = 0; ipop < q; ++ipop) { + f[ix][iy][ipop] = compute_feq(w[ipop], c[ipop], rho, u); + } + } + } + return f; +} + +float *allocate_and_ini_at_equilibrium_1d(float rho, float u[d], int nx, int ny) { + float *f = (float *)malloc(nx * ny * q * sizeof(float)); + int i = 0; + for (int ix = 0; ix < nx; ++ix) { + for (int iy = 0; iy < ny; ++iy) { + float rho_ini = rho; + if (ix == nx / 4 && iy == ny / 4) { + rho_ini = 1.1; + } + for (int ipop = 0; ipop < q; ++ipop) { + f[i] = compute_feq(w[ipop], c[ipop], rho_ini, u); + ++i; + } + } + } + return f; +} + +void deallocate_3d(float ***f, int nx, int ny) { + for (int ix = 0; ix < nx; ++ix) { + for (int iy = 0; iy < ny; ++iy) { + free(f[ix][iy]); + } + free(f[ix]); + } + free(f); +} + +float **allocate_2d(int nx, int ny) { + float **f = (float **)malloc(nx * sizeof(float *)); + for (int ix = 0; ix < nx; ++ix) { + f[ix] = malloc(ny * sizeof(float)); + } + return f; +} + +void deallocate_2d(float **f, int nx) { + for (int ix = 0; ix < nx; ++ix) { + free(f[ix]); + } + free(f); +} + +float ***reshape_float_3d(int nx, int ny, int nz, struct futhark_f32_2d *f_in, struct futhark_context *ctx) { + float *data = malloc(nx * ny * nz * sizeof(float)); + futhark_values_f32_2d(ctx, f_in, data); + + float ***reshaped = allocate_3d(nx, ny, nz); + + for (int ix = 0; ix < nx; ++ix) { + for (int iy = 0; iy < ny; ++iy) { + for (int iz = 0; iz < nz; ++iz) { + reshaped[ix][iy][iz] = data[iy + ny*(ix + nx*iz)]; + } + } + } + + return reshaped; +} + +float ***compute_velocity(float ***f, int nx, int ny) { + float ***vel = allocate_3d(nx, ny, d); + + for (int ix = 0; ix < nx; ++ix) { + for (int iy = 0; iy < ny; ++iy) { + float u[d] = {0.0, 0.0}; + float rho = 0.0; + for (int ipop = 0; ipop < q; ++ipop) { + rho += f[ix][iy][ipop]; + for (int id = 0; id < d; ++id) { + u[id] += f[ix][iy][ipop] * c[id][ipop]; + } + } + for (int id = 0; id < d; ++id) { + u[id] /= rho; + vel[ix][iy][id] = u[id]; + } + } + } + return vel; +} + +float **compute_density(float ***f, int nx, int ny) { + float **density = allocate_2d(nx, ny); + + for (int ix = 0; ix < nx; ++ix) { + for (int iy = 0; iy < ny; ++iy) { + float rho = 0.0; + for (int ipop = 0; ipop < q; ++ipop) { + rho += f[ix][iy][ipop]; + } + density[ix][iy] = rho; + } + } + return density; +} + +float **compute_norm(float ***f, int nx, int ny, int nz) { + float **norm = allocate_2d(nx, ny); + + for (int ix = 0; ix < nx; ++ix) { + for (int iy = 0; iy < ny; ++iy) { + float n = 0.0; + for (int id = 0; id < nz; ++id) { + n += f[ix][iy][id] * f[ix][iy][id]; + } + norm[ix][iy] = sqrt(n); + } + } + + return norm; +} + +/** + * Ecriture du tableau des vitesses dans un fichier + **/ +void write_file_3d(float ***data, const int32_t nx, const int32_t ny, const int32_t nz, int i) { + // Create filename + char integer_string[10]; + sprintf(integer_string, "%d", i); + char *filename = strcat(integer_string, ".txt"); + + // Open file + FILE *fp; + if ((fp = fopen(filename, "w")) == NULL) { + printf("Error ! opening file"); + exit(EXIT_FAILURE); + } + + // Fill file + for (int32_t x = 0; x < nx; x++) { + for (int32_t y = 0; y < ny; y++) { + for (int32_t z = 0; z < nz; z++) { + fprintf(fp, "%f ", data[x][y][z]); + } + } + } + + fclose(fp); +} + +#ifdef PTK_SDL +void show_pixels(struct gfx_context_t* context, int nx, int ny, float **pixels) { + gfx_clear(context, COLOR_BLACK); + + float min = compute_min(pixels, nx, ny); + float max = compute_max(pixels, nx, ny); + + for(int ix = 0; ix < nx; ++ix) { + for(int iy = 0; iy < ny; ++iy){ + int px = (int)(convert_to_zero_one(pixels[ix][iy], min, max) * MAXCOLOR); + + uint32_t color = MAKE_COLOR(px, px, px); + + gfx_putpixel(context, ix, iy, color); + } + } + gfx_present(context); +} +#endif + + +int main(int argc, char **argv) { + + // Mauvais nombre d'arguments + if (argc != 5) { + printf("Incorrect argument number\n"); + usage(); + return EXIT_FAILURE; + } + struct futhark_context_config *cfg = futhark_context_config_new(); + struct futhark_context *ctx = futhark_context_new(cfg); + + const int32_t nx = atoi(argv[1]); + const int32_t ny = atoi(argv[2]); + const float tau = atof(argv[3]); + const int iter = atoi(argv[4]); + +#ifdef PTK_SDL + struct gfx_context_t* context = gfx_create("Porous medium", nx, ny); + if (!context) { + fprintf(stderr, "Graphic mode initialization failed!\n"); + EXIT_FAILURE; + } + SDL_Keycode key_pressed = 0; // escape keyy needed +#endif + + // Norme des vitesses en chaque point du maillage (struct futhark et c) + float u[d] = {0.0, 0.0}; + float *data = allocate_and_ini_at_equilibrium_1d(1.0, u, nx, ny); + + struct futhark_f32_2d *f_1 = futhark_new_f32_2d(ctx, + data, nx*ny, q); + + struct futhark_f32_2d *ff; + + struct timespec start, finish; + clock_gettime(CLOCK_MONOTONIC, &start); + for (int it = 0; it < iter; ++it) { + futhark_entry_main(ctx, &f_1, nx, ny, 9, f_1, tau, 1); + } + clock_gettime(CLOCK_MONOTONIC, &finish); + float elapsed = (finish.tv_sec - start.tv_sec) * 1e6; + elapsed += (finish.tv_nsec - start.tv_nsec) / 1e3; + printf("elapsed microseconds = %f\n", elapsed); + printf("approximative MLUSP = %f\n", (float)nx * (float)ny * (float)iter / elapsed); + + struct futhark_f32_2d *fout = f_1; + // if (f_1 == NULL) { + // fout = f_2; + // } + int64_t *shape = futhark_shape_f32_2d(ctx, fout); + assert(nx*ny == shape[0] && q == shape[1]); + + float ***f = reshape_float_3d(nx, ny, q, fout, ctx); + float **density = compute_density(f, nx, ny); + float ***vel = compute_velocity(f, nx, ny); + float **vel_norm = compute_norm(vel, nx, ny, d); + // write_file_3d(f, nx, ny, q, iter); +#ifdef PTK_SDL + while(1) { + SDL_PumpEvents(); + + key_pressed = gfx_keypressed(); + show_pixels(context, nx, ny, density); + if (key_pressed == SDLK_ESCAPE) { + break; + } + } + gfx_destroy(context); +#endif + + + deallocate_3d(f, nx, ny); + deallocate_3d(vel, nx, ny); + deallocate_2d(vel_norm, nx); + deallocate_2d(density, nx); + + free(fout); + free(data); + + + + return EXIT_SUCCESS; +} diff --git a/presentations/pasc/code/lbm3d.c b/presentations/pasc/code/lbm3d.c new file mode 100644 index 0000000000000000000000000000000000000000..1ebd1e1fdbcee33d520f47ddd74baa08f1082800 --- /dev/null +++ b/presentations/pasc/code/lbm3d.c @@ -0,0 +1,102 @@ +#include "liblbm.h" +#include "c_interface/includes.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <time.h> +#include <float.h> +#include <math.h> + +/** + * Print how the program is supposed to be called + */ +void usage() { + printf("%s\n%s\n%s\n%s\n%s\n%s\n", "\nusage:\n ./lbm <n> <Re> <max_t> <prefix>", + "where:", " <n> is the mesh dimension in each spatial direction ( >= 10),", + "\t<Re> is the Reynolds number", + "\t<max_t> is maximum simulation time in physical units.", + "\t<prefix> is the directory name where the output will be written."); +} + +int main(int argc, char **argv) { + + // Mauvais nombre d'arguments + if (argc != 5) { + printf("Incorrect argument number\n"); + usage(); + return EXIT_FAILURE; + } + struct futhark_context_config *cfg = futhark_context_config_new(); + struct futhark_context *ctx = futhark_context_new(cfg); + + const fint n = atoi(argv[1]); + const T Re = atof(argv[2]); + const T end_time = atof(argv[3]); + const char *prefix = argv[4]; + + check_dir(prefix); + + const T log_t = 1.0 / 24.0; + const T u_max = 0.05; + + + units_converter units = units_converter_create_inertial(Re * 2 * pi, 2 * pi, n, 1.0, u_max, 1.0, 1.0, 1.0); + units_converter_print(units); + + const fint iter = (fint)(end_time / units.delta_t + 0.5); + + // Norme des vitesses en chaque point du maillage (struct futhark et c) + T rho = 1.0; + tensor_field lattice = lbm_allocate_and_ini_at_equilibrium_kida(rho, u_max, n, n, n, 27); + + struct futhark_f32_4d *f = tensor_field_to_futhark_f32_4d(lattice, ctx); + + scalar_field density = lbm_compute_density(lattice); + tensor_field velocity = lbm_compute_velocity(lattice); + + struct timespec start, finish; + fint delta_iter = max((fint)(log_t / units.delta_t + 0.5), 1); + clock_gettime(CLOCK_MONOTONIC, &start); + fint i = 0; + for (fint it = 0; it < iter; it += delta_iter) { + // { + // tensor_field_from_futhark_f32_4d_inplace(f, ctx, lattice); + // lbm_compute_density_inplace(lattice, density); + // lbm_compute_velocity_inplace(lattice, velocity); + + // char *fname = vtk_create_fname(prefix, "out", it); + + // vtk_file vtk = vtk_file_create_default(fname, box_create(0, n-1, 0, n-1, 0, n-1), 1.0); + // vtk_file_write_scalar_field(&vtk, density, "density"); + // vtk_file_write_tensor_field(&vtk, velocity, "velocity"); + // vtk_file_close(&vtk); + + // free(fname); + // } + + f = lbm_collide_and_stream(ctx, f, units.omega, delta_iter); + i += 1; + } + + futhark_context_sync(ctx); + clock_gettime(CLOCK_MONOTONIC, &finish); + T elapsed = (finish.tv_sec - start.tv_sec) * 1e6; + elapsed += (finish.tv_nsec - start.tv_nsec) / 1e3; + printf("elapsed microseconds = %f\n", elapsed); + printf("approximative MLUSP = %f\n", (T)n * (T)n * (T)n * (T)i * (T)delta_iter / elapsed); + + + + tensor_field_deallocate(lattice); + tensor_field_deallocate(velocity); + scalar_field_deallocate(density); + + // futhark_free_f32_4d(ctx, f); + futhark_free_f32_4d(ctx, f); + + futhark_context_config_free(cfg); + futhark_context_free(ctx); + + return EXIT_SUCCESS; +} diff --git a/presentations/pasc/code/liblbm.fut b/presentations/pasc/code/liblbm.fut new file mode 100644 index 0000000000000000000000000000000000000000..ea3c58b752e76969ab8870c815887ceea8586254 --- /dev/null +++ b/presentations/pasc/code/liblbm.fut @@ -0,0 +1,111 @@ +-- Program for Lattice-Boltzmann in Futhark +-- Arrays everywhere + +-- Two useful functions for later +let tabulate_4d 'a (p: i64) (n: i64) (m: i64) (o: i64) (f: i64 -> i64 -> i64 -> i64 -> a): *[p][n][m][o]a = + map1 (f >-> tabulate_3d n m o) (iota p) + +let dotprod (xs: []f32) (ys: []f32): f32 = + reduce (+) 0 (map (\(x, y) -> x * y) (zip xs ys)) + +------------------------------------ +------------ CONSTANTS ------------- +------------------------------------ + +module d3q27 = { + + let c: [][]f32 = [ + [ 0, 0, 0], + + [-1, 0, 0], [ 0,-1, 0], [ 0, 0,-1], + [-1,-1, 0], [-1, 1, 0], [-1, 0,-1], + [-1, 0, 1], [ 0,-1,-1], [ 0,-1, 1], + [-1,-1,-1], [-1,-1, 1], [-1, 1,-1], [-1, 1, 1], + + [ 1, 0, 0], [ 0, 1, 0], [ 0, 0, 1], + [ 1, 1, 0], [ 1,-1, 0], [ 1, 0, 1], + [ 1, 0,-1], [ 0, 1, 1], [ 0, 1,-1], + [ 1, 1, 1], [ 1, 1,-1], [ 1,-1, 1], [ 1,-1,-1] ] + + let w: []f32 = [ + 8.0/27, + 2.0/27, 2.0/27, 2.0/27, + 1.0/54, 1.0/54, 1.0/54, + 1.0/54, 1.0/54, 1.0/54, + 1.0/216, 1.0/216, 1.0/216, 1.0/216, + 2.0/27, 2.0/27, 2.0/27, + 1.0/54, 1.0/54, 1.0/54, + 1.0/54, 1.0/54, 1.0/54, + 1.0/216, 1.0/216, 1.0/216, 1.0/216 + ] +} + +let compute_rho [nx][ny][nz][q] (f: [nx][ny][nz][q]f32): [nx][ny][nz]f32 = + map(\fx -> + map(\fxy -> + map(\fxyz -> + reduce (+) 0 fxyz + ) fxy + ) fx + ) f + +let compute_j [nx][ny][nz][q][d] (f: [nx][ny][nz][q]f32)(c: [q][d]f32): [nx][ny][nz][d]f32 = + map(\fx -> + map(\fxy -> + map(\fxyz -> + map(\ci -> + dotprod ci fxyz + ) (transpose c) -- intrinsic + ) fxy + ) fx + ) f + +let compute_feq [nx][ny][nz][q][d] (w: [q]f32) (c: [q][d]f32)(rho: [nx][ny][nz]f32)(j: [nx][ny][nz][d]f32): [nx][ny][nz][q]f32 = + map2(\rho_x j_x -> + map2(\rho_xy j_xy -> + map2(\rho_xyz j_xyz -> + let u = map(\ji -> ji / rho_xyz) j_xyz + let u_sqr = dotprod u u + + in map2(\wi ci -> + let c_u = dotprod ci u + in wi * rho_xyz* (1 + 3*c_u + 4.5 * c_u * c_u - 1.5 * u_sqr) + ) w c + ) rho_xy j_xy + )rho_x j_x + ) rho j + +let collision [nx][ny][nz][q] (f: [nx][ny][nz][q]f32)(feq: [nx][ny][nz][q]f32) (omega: f32): [nx][ny][nz][q]f32 = + map2 (\fx feqx -> + map2(\fxy feqxy -> + map2(\fxyz feqxyz -> + map2(\fxyzi feqxyzi -> + (1-omega) * fxyzi + omega * feqxyzi + ) fxyz feqxyz + ) fxy feqxy + ) fx feqx + ) f feq + +let collide [nx][ny][nz][q][d] (f: [nx][ny][nz][q]f32) (omega: f32) (w: [q]f32) (c: [q][d]f32): [nx][ny][nz][q]f32 = + let rho = compute_rho f + let j = compute_j f c + let feq = compute_feq w c rho j + + in collision f feq omega + + +let streaming [nx][ny][nz][q][d] (f: [nx][ny][nz][q]f32) (c: [q][d]f32): [nx][ny][nz][q]f32 = + tabulate_4d nx ny nz q (\x y z i -> + let next_x = (x - (i64.f32 c[i,0]) + nx ) % nx + let next_y = (y - (i64.f32 c[i,1]) + ny ) % ny + let next_z = (z - (i64.f32 c[i,2]) + nz ) % nz + + in f[next_x, next_y, next_z, i] + ) + +entry main [nx][ny][nz] (fin: [nx][ny][nz][27]f32)(omega: f32) (max_t: i64): [nx][ny][nz][27]f32 = + let f = loop f = fin for _i < max_t do + streaming (collide f omega (copy d3q27.w) (copy d3q27.c)) (copy d3q27.c) + in f + + diff --git a/presentations/pasc/default.latex b/presentations/pasc/default.latex new file mode 100644 index 0000000000000000000000000000000000000000..4b9e09c2e796dfc683f4b0cbd7b9ee40fd640dd9 --- /dev/null +++ b/presentations/pasc/default.latex @@ -0,0 +1,288 @@ +\documentclass[$if(fontsize)$$fontsize$,$endif$$if(lang)$$babel-lang$,$endif$$if(papersize)$$papersize$paper,$endif$$for(classoption)$$classoption$$sep$,$endfor$]{$documentclass$} +$if(beamerarticle)$ +\usepackage{beamerarticle} % needs to be loaded first +$endif$ +$if(fontfamily)$ +\usepackage[$for(fontfamilyoptions)$$fontfamilyoptions$$sep$,$endfor$]{$fontfamily$} +$else$ +\usepackage{lmodern} +$endif$ +$if(linestretch)$ +\usepackage{setspace} +\setstretch{$linestretch$} +$endif$ +\usepackage{amssymb,amsmath,bm} +\usepackage{ifxetex,ifluatex} +\usepackage{fixltx2e} % provides \textsubscript +\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex + \usepackage[$if(fontenc)$$fontenc$$else$T1$endif$]{fontenc} + \usepackage[utf8]{inputenc} +$if(euro)$ + \usepackage{eurosym} +$endif$ +\else % if luatex or xelatex +$if(mathspec)$ + \ifxetex + \usepackage{mathspec} + \else + \usepackage{unicode-math} + \fi +$else$ + \usepackage{unicode-math} +$endif$ + \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase} +$for(fontfamilies)$ + \newfontfamily{$fontfamilies.name$}[$fontfamilies.options$]{$fontfamilies.font$} +$endfor$ +$if(euro)$ + \newcommand{\euro}{€} +$endif$ +$if(mainfont)$ + \setmainfont[$for(mainfontoptions)$$mainfontoptions$$sep$,$endfor$]{$mainfont$} +$endif$ +$if(sansfont)$ + \setsansfont[$for(sansfontoptions)$$sansfontoptions$$sep$,$endfor$]{$sansfont$} +$endif$ +$if(monofont)$ + \setmonofont[Mapping=tex-ansi$if(monofontoptions)$,$for(monofontoptions)$$monofontoptions$$sep$,$endfor$$endif$]{$monofont$} +$endif$ +$if(mathfont)$ +$if(mathspec)$ + \ifxetex + \setmathfont(Digits,Latin,Greek)[$for(mathfontoptions)$$mathfontoptions$$sep$,$endfor$]{$mathfont$} + \else + \setmathfont[$for(mathfontoptions)$$mathfontoptions$$sep$,$endfor$]{$mathfont$} + \fi +$else$ + \setmathfont[$for(mathfontoptions)$$mathfontoptions$$sep$,$endfor$]{$mathfont$} +$endif$ +$endif$ +$if(CJKmainfont)$ + \usepackage{xeCJK} + \setCJKmainfont[$for(CJKoptions)$$CJKoptions$$sep$,$endfor$]{$CJKmainfont$} +$endif$ +\fi +% use upquote if available, for straight quotes in verbatim environments +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} +% use microtype if available +\IfFileExists{microtype.sty}{% +\usepackage[$for(microtypeoptions)$$microtypeoptions$$sep$,$endfor$]{microtype} +\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts +}{} +\PassOptionsToPackage{hyphens}{url} % url is loaded by hyperref +$if(verbatim-in-note)$ +\usepackage{fancyvrb} +$endif$ +% \usepackage[unicode=true]{hyperref} commented because of conflict +$if(colorlinks)$ +\PassOptionsToPackage{usenames,dvipsnames}{color} % color is loaded by hyperref +$endif$ +\hypersetup{ +$if(title-meta)$ + pdftitle={$title-meta$}, +$endif$ +$if(author-meta)$ + pdfauthor={$author-meta$}, +$endif$ +$if(keywords)$ + pdfkeywords={$for(keywords)$$keywords$$sep$, $endfor$}, +$endif$ +$if(colorlinks)$ + colorlinks=true, + linkcolor=$if(linkcolor)$$linkcolor$$else$Maroon$endif$, + citecolor=$if(citecolor)$$citecolor$$else$Blue$endif$, + urlcolor=$if(urlcolor)$$urlcolor$$else$Blue$endif$, +$else$ + pdfborder={0 0 0}, +$endif$ + breaklinks=true} +\urlstyle{same} % don't use monospace font for urls +$if(verbatim-in-note)$ +\VerbatimFootnotes % allows verbatim text in footnotes +$endif$ +$if(geometry)$ +\usepackage[$for(geometry)$$geometry$$sep$,$endfor$]{geometry} +$endif$ +$if(lang)$ +\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex + \usepackage[shorthands=off,$for(babel-otherlangs)$$babel-otherlangs$,$endfor$main=$babel-lang$]{babel} +$if(babel-newcommands)$ + $babel-newcommands$ +$endif$ +\else + \usepackage{polyglossia} + \setmainlanguage[$polyglossia-lang.options$]{$polyglossia-lang.name$} +$for(polyglossia-otherlangs)$ + \setotherlanguage[$polyglossia-otherlangs.options$]{$polyglossia-otherlangs.name$} +$endfor$ +\fi +$endif$ +$if(natbib)$ +\usepackage{natbib} +\bibliographystyle{$if(biblio-style)$$biblio-style$$else$plainnat$endif$} +$endif$ +$if(biblatex)$ +\usepackage[$if(biblio-style)$style=$biblio-style$,$endif$$for(biblatexoptions)$$biblatexoptions$$sep$,$endfor$]{biblatex} +$for(bibliography)$ +\addbibresource{$bibliography$} +$endfor$ +$endif$ +$if(listings)$ +\usepackage{listings} +$endif$ +$if(lhs)$ +\lstnewenvironment{code}{\lstset{language=Haskell,basicstyle=\small\ttfamily}}{} +$endif$ +$if(highlighting-macros)$ +$highlighting-macros$ +$endif$ +$if(tables)$ +\usepackage{longtable,booktabs} +% Fix footnotes in tables (requires footnote package) +\IfFileExists{footnote.sty}{\usepackage{footnote}\makesavenoteenv{long table}}{} +$endif$ +$if(graphics)$ +\usepackage{graphicx,grffile} +\makeatletter +\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi} +\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi} +\makeatother +% Scale images if necessary, so that they will not overflow the page +% margins by default, and it is still possible to overwrite the defaults +% using explicit options in \includegraphics[width, height, ...]{} +\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio} +$endif$ +$if(links-as-notes)$ +% Make links footnotes instead of hotlinks: +\renewcommand{\href}[2]{#2\footnote{\url{#1}}} +$endif$ +$if(strikeout)$ +\usepackage[normalem]{ulem} +% avoid problems with \sout in headers with hyperref: +\pdfstringdefDisableCommands{\renewcommand{\sout}{}} +$endif$ +$if(indent)$ +$else$ +\IfFileExists{parskip.sty}{% +\usepackage{parskip} +}{% else +\setlength{\parindent}{0pt} +\setlength{\parskip}{6pt plus 2pt minus 1pt} +} +$endif$ +\setlength{\emergencystretch}{3em} % prevent overfull lines +\providecommand{\tightlist}{% + \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} +$if(numbersections)$ +\setcounter{secnumdepth}{$if(secnumdepth)$$secnumdepth$$else$5$endif$} +$else$ +\setcounter{secnumdepth}{0} +$endif$ +$if(subparagraph)$ +$else$ +% Redefines (sub)paragraphs to behave more like sections +\ifx\paragraph\undefined\else +\let\oldparagraph\paragraph +\renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}} +\fi +\ifx\subparagraph\undefined\else +\let\oldsubparagraph\subparagraph +\renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}} +\fi +$endif$ +$if(dir)$ +\ifxetex + % load bidi as late as possible as it modifies e.g. graphicx + $if(latex-dir-rtl)$ + \usepackage[RTLdocument]{bidi} + $else$ + \usepackage{bidi} + $endif$ +\fi +\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex + \TeXXeTstate=1 + \newcommand{\RL}[1]{\beginR #1\endR} + \newcommand{\LR}[1]{\beginL #1\endL} + \newenvironment{RTL}{\beginR}{\endR} + \newenvironment{LTR}{\beginL}{\endL} +\fi +$endif$ + +% set default figure placement to htbp +\makeatletter +\def\fps@figure{htbp} +\makeatother + +$for(header-includes)$ +$header-includes$ +$endfor$ + +$if(title)$ +\title{$title$$if(thanks)$\thanks{$thanks$}$endif$} +$endif$ +$if(subtitle)$ +\providecommand{\subtitle}[1]{} +\subtitle{$subtitle$} +$endif$ +$if(author)$ +\author{$for(author)$$author$$sep$ \and $endfor$} +$endif$ +$if(institute)$ +\providecommand{\institute}[1]{} +\institute{$for(institute)$$institute$$sep$ \and $endfor$} +$endif$ +\date{$date$} + +\begin{document} +$if(title)$ +\maketitle +$endif$ +$if(abstract)$ +\begin{abstract} +$abstract$ +\end{abstract} +$endif$ + +$for(include-before)$ +$include-before$ + +$endfor$ +$if(toc)$ +{ +$if(colorlinks)$ +\hypersetup{linkcolor=$if(toccolor)$$toccolor$$else$black$endif$} +$endif$ +\setcounter{tocdepth}{$toc-depth$} +\tableofcontents +} +$endif$ +$if(lot)$ +\listoftables +$endif$ +$if(lof)$ +\listoffigures +$endif$ +$body$ + +$if(natbib)$ +$if(bibliography)$ +$if(biblio-title)$ +$if(book-class)$ +\renewcommand\bibname{$biblio-title$} +$else$ +\renewcommand\refname{$biblio-title$} +$endif$ +$endif$ +\bibliography{$for(bibliography)$$bibliography$$sep$,$endfor$} + +$endif$ +$endif$ +$if(biblatex)$ +\printbibliography$if(biblio-title)$[title=$biblio-title$]$endif$ + +$endif$ +$for(include-after)$ +$include-after$ + +$endfor$ +\end{document} diff --git a/presentations/pasc/metadata.yaml b/presentations/pasc/metadata.yaml new file mode 100644 index 0000000000000000000000000000000000000000..acc55e66eb58fa27b2a40ea918bb644a28f97635 --- /dev/null +++ b/presentations/pasc/metadata.yaml @@ -0,0 +1,7 @@ +--- +# used for lecture slides and homework sheets +subtitle: "Séminaire de groupe du SPC" +author: "Orestis Malaspinas, ITI, HEPIA" +# lang: fr-CH +... + diff --git a/presentations/pasc/my_highlight.theme b/presentations/pasc/my_highlight.theme new file mode 100644 index 0000000000000000000000000000000000000000..1d80b47b144ca30f252d27a32e3d3775267c7bbb --- /dev/null +++ b/presentations/pasc/my_highlight.theme @@ -0,0 +1,204 @@ +{ + "text-color": null, + "background-color": "#f0f0f0", + "line-number-color": "#aaaaaa", + "line-number-background-color": null, + "text-styles": { + "Other": { + "text-color": "#8f5902", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "Attribute": { + "text-color": "#c4a000", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "SpecialString": { + "text-color": "#4e9a06", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "Annotation": { + "text-color": "#8f5902", + "background-color": null, + "bold": true, + "italic": true, + "underline": false + }, + "Function": { + "text-color": "#000000", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "String": { + "text-color": "#4e9a06", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "ControlFlow": { + "text-color": "#204a87", + "background-color": null, + "bold": true, + "italic": false, + "underline": false + }, + "Operator": { + "text-color": "#ce5c00", + "background-color": null, + "bold": true, + "italic": false, + "underline": false + }, + "Error": { + "text-color": "#a40000", + "background-color": null, + "bold": true, + "italic": false, + "underline": false + }, + "BaseN": { + "text-color": "#0000cf", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "Alert": { + "text-color": "#ef2929", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "Variable": { + "text-color": "#000000", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "Extension": { + "text-color": null, + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "Preprocessor": { + "text-color": "#8f5902", + "background-color": null, + "bold": false, + "italic": true, + "underline": false + }, + "Information": { + "text-color": "#8f5902", + "background-color": null, + "bold": true, + "italic": true, + "underline": false + }, + "VerbatimString": { + "text-color": "#4e9a06", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "Warning": { + "text-color": "#8f5902", + "background-color": null, + "bold": true, + "italic": true, + "underline": false + }, + "Documentation": { + "text-color": "#8f5902", + "background-color": null, + "bold": true, + "italic": true, + "underline": false + }, + "Import": { + "text-color": null, + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "Char": { + "text-color": "#4e9a06", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "DataType": { + "text-color": "#204a87", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "Float": { + "text-color": "#0000cf", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "Comment": { + "text-color": "#8f5902", + "background-color": null, + "bold": false, + "italic": true, + "underline": false + }, + "CommentVar": { + "text-color": "#8f5902", + "background-color": null, + "bold": true, + "italic": true, + "underline": false + }, + "Constant": { + "text-color": "#000000", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "SpecialChar": { + "text-color": "#000000", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "DecVal": { + "text-color": "#0000cf", + "background-color": null, + "bold": false, + "italic": false, + "underline": false + }, + "Keyword": { + "text-color": "#204a87", + "background-color": null, + "bold": true, + "italic": false, + "underline": false + } + } +} diff --git a/presentations/pasc/pres.md b/presentations/pasc/pres.md new file mode 100644 index 0000000000000000000000000000000000000000..64aaa6e4cafc9a6665990ebf4bff71bca92f4ed9 --- /dev/null +++ b/presentations/pasc/pres.md @@ -0,0 +1,136 @@ +% Ludwig Boltzmann and the Raiders of the Futhark +% O. Malaspinas +% 15 novembre 2019 + +# What is Futhark + +* Statically typed, data-parallel, purely functional, array language ... +* with limited functionalities (no I/O) ... +* that compiles to C, OpenCL, and Cuda backends ... +* efficiently ... +* without the pain of actually writing GPU code. + +Spoiler: a toy d3q27 recursive regularized model does 1.5 GLUPS (single precision). + +# Why use Futhark? + +<!-- TODO: add opencl example --> + +# How to use Futhark + +* Not intended to replace existing generic-purpose languages. +* But aims at being easily integrated into non Futhark code: + + * Used into python code: `PyOpenCL`, + * Conventional `C` code, + * Several others (`C#`, `Haskell`, `F#`, ..., and soon `Rust`). + +* Futhark produces `C` code so it's accessible from any language through a FFI. + +# An example: the dot product + +## `dotprod.fut` + +``` +entry dotprod (xs: []i32) (ys: []i32): i32 = + reduce (+) 0 (map2 (*) xs ys) +``` + +Futhark is very efficient at parallelizing array operations. + +# An example: the dot product from `C` + +```C +#include <stdio.h> +#include "dotprod.h" +int main() { + int x[] = { 1, 2, 3, 4 }; + int y[] = { 2, 3, 4, 1 }; + // here goes context init + futhark_i32_1d *x_arr = futhark_new_i32_1d(ctx, x, 4); + futhark_i32_1d *y_arr = futhark_new_i32_1d(ctx, y, 4); + + int res; + futhark_entry_dotprod(ctx, &res, x_arr, y_arr); + futhark_context_sync(ctx); + + printf("Result: %d\n", res); + // here goes context destruction and + // memory deallocation +} +``` + +# And now + +Let's implement some lattice Boltzmann + +# Computation of macroscopic moments + +## LBM equation + +\begin{equation} +\rho=\sum_{i=0}^{q-1}f_i,\quad \rho\bm{u}=\sum_{i=0}^{q-1}f_i \bm{c}_i, \forall\ \bm{x}. +\end{equation} + +## Futhark code + +``` +let compute_rho (f: [nx][ny][nz][27]f32): [nx][ny][nz]f32 = + map ( map ( map ( reduce (+) 0.0 ) ) ) f +``` + +<!-- TODO add j --> + +# Computation of the equilibrium distribution + +## LBM equation + +\begin{equation} +f_i^\mathrm{eq}=w_i\rho\left(1+\frac{\bm{c}_i\cdot \bm{u}}{c_s^2}+\frac{1}{2c_s^4}(\bm{c}_i\cdot \bm{u})^2-\frac{1}{2c_s^2}\bm{u}^2\right),\ \forall \bm{x},i +\end{equation} + +## Futhark code + +<!-- TODO add feq --> + +# Collision + +## LBM equation + +\begin{equation} +f^\mathrm{out}_i=f_i\left(1-\frac{1}{\tau}\right)+\frac{1}{\tau}f_i^\mathrm{eq}. +\end{equation} + +## Futhark code + +<!-- TODO add collision --> + +# Propagation + +## LBM equation + +\begin{equation} +f_i(\bm{x}+\bm{c}_i,t+1)=f^\mathrm{out}_i(\bm{x},t). +\end{equation} + +## Futhark code + +<!-- TODO add collision --> + +# And now everything together + +1. Collision (completely local). + * Compute $\rho$, + * Compute $\bm{u}$, + * Compute $f_i^\mathrm{eq}$, + * Compute $f_i^\mathrm{out}$. +2. Propagation. + +## Futhark code + +<!-- TODO add collide and stream --> + + + + +