diff --git a/presentations/pasc/code/liblbm.fut b/presentations/pasc/code/liblbm.fut index ea3c58b752e76969ab8870c815887ceea8586254..272a2c54b9f4460e211e3d4ca4604670d1b119d2 100644 --- a/presentations/pasc/code/liblbm.fut +++ b/presentations/pasc/code/liblbm.fut @@ -8,6 +8,24 @@ let tabulate_4d 'a (p: i64) (n: i64) (m: i64) (o: i64) (f: i64 -> i64 -> i64 -> let dotprod (xs: []f32) (ys: []f32): f32 = reduce (+) 0 (map (\(x, y) -> x * y) (zip xs ys)) +let map3d 'a [nx][ny][nz]'b (f: a -> b) (xs: [nx][ny][nz]a): [nx][ny][nz]b = + map(\xs1 -> + map(\xs2 -> + map(\xs3 -> + f xs3 + ) xs2 + ) xs1 + ) xs + +let map2_3d 'a 'c [nx][ny][nz]'b (f: a -> c -> b) (xs: [nx][ny][nz]a) (ys: [nx][ny][nz]c): [nx][ny][nz]b = + map2(\xs1 ys1 -> + map2(\xs2 ys2 -> + map2(\xs3 ys3 -> + f xs3 ys3 + ) xs2 ys2 + ) xs1 ys1 + ) xs ys + ------------------------------------ ------------ CONSTANTS ------------- ------------------------------------ @@ -41,49 +59,33 @@ module d3q27 = { } let compute_rho [nx][ny][nz][q] (f: [nx][ny][nz][q]f32): [nx][ny][nz]f32 = - map(\fx -> - map(\fxy -> - map(\fxyz -> - reduce (+) 0 fxyz - ) fxy - ) fx + map3d(\fxyz -> + reduce (+) 0 fxyz ) f let compute_j [nx][ny][nz][q][d] (f: [nx][ny][nz][q]f32)(c: [q][d]f32): [nx][ny][nz][d]f32 = - map(\fx -> - map(\fxy -> - map(\fxyz -> - map(\ci -> - dotprod ci fxyz - ) (transpose c) -- intrinsic - ) fxy - ) fx + map3d(\fxyz -> + map(\ci -> + dotprod ci fxyz + ) (transpose c) -- intrinsic ) f let compute_feq [nx][ny][nz][q][d] (w: [q]f32) (c: [q][d]f32)(rho: [nx][ny][nz]f32)(j: [nx][ny][nz][d]f32): [nx][ny][nz][q]f32 = - map2(\rho_x j_x -> - map2(\rho_xy j_xy -> - map2(\rho_xyz j_xyz -> - let u = map(\ji -> ji / rho_xyz) j_xyz - let u_sqr = dotprod u u - - in map2(\wi ci -> - let c_u = dotprod ci u - in wi * rho_xyz* (1 + 3*c_u + 4.5 * c_u * c_u - 1.5 * u_sqr) - ) w c - ) rho_xy j_xy - )rho_x j_x + map2_3d(\rho_xyz j_xyz -> + let u = map(\ji -> ji / rho_xyz) j_xyz + let u_sqr = dotprod u u + + in map2(\wi ci -> + let c_u = dotprod ci u + in wi * rho_xyz* (1 + 3*c_u + 4.5 * c_u * c_u - 1.5 * u_sqr) + ) w c ) rho j let collision [nx][ny][nz][q] (f: [nx][ny][nz][q]f32)(feq: [nx][ny][nz][q]f32) (omega: f32): [nx][ny][nz][q]f32 = - map2 (\fx feqx -> - map2(\fxy feqxy -> - map2(\fxyz feqxyz -> - map2(\fxyzi feqxyzi -> - (1-omega) * fxyzi + omega * feqxyzi - ) fxyz feqxyz - ) fxy feqxy - ) fx feqx + map2_3d (\f_xyz feq_xyz -> + map2(\f_i feq_i -> + (1-omega) * f_i + omega * feq_i + ) f_xyz feq_xyz ) f feq let collide [nx][ny][nz][q][d] (f: [nx][ny][nz][q]f32) (omega: f32) (w: [q]f32) (c: [q][d]f32): [nx][ny][nz][q]f32 = diff --git a/presentations/pasc/metadata.yaml b/presentations/pasc/metadata.yaml index da49329560aa1fc5e0933c3a4ea4a02f230dd9c4..24bccb891dfacddca0b90ca3be663a6bfc5ded70 100644 --- a/presentations/pasc/metadata.yaml +++ b/presentations/pasc/metadata.yaml @@ -1,7 +1,7 @@ --- # used for lecture slides and homework sheets subtitle: "PASC workshop" -author: "Orestis Malaspinas, HEPIA" +author: "Orestis Malaspinas and Michaƫl El Kharroubi, HEPIA" # lang: fr-CH ... diff --git a/presentations/pasc/pres.md b/presentations/pasc/pres.md index f962ccb50a8b22a30b36546a7955f0a3d8945a82..86f79c819d411104180d2ce8fa73ee1484d06756 100644 --- a/presentations/pasc/pres.md +++ b/presentations/pasc/pres.md @@ -14,7 +14,7 @@ - Statically typed, data-parallel, purely functional, array language, - with limited functionalities (no I/O for example), -- that compiles to sequencial, multi-core, OpenCL, and Cuda backends, +- that compiles to sequential, multi-core, OpenCL, and Cuda backends, - very efficiently, - without the pain of actually writing sequential, multi-code, or GPU code. @@ -132,8 +132,6 @@ These operations are very efficiently parallelized. . . . -::: cdotprod - ## `dotprod.c` ```C @@ -145,7 +143,6 @@ int dot_prod(int xs[], int ys[], int size) { return dot; } ``` -::: # An example: the dot product from `C` @@ -177,7 +174,7 @@ int main() { ## The algorithm -* Cellular automaton-like algorithm for fluid flow simulation. +* *Cellular automaton*-like algorithm for fluid flow simulation. * Cartesian grid with $q$ variables per grid point, $f_i(\bm{x}, t)$. * Each time step is made of: 1. Collision (local operations only). @@ -197,8 +194,51 @@ Repeat 1-2 a certain amount of times 3. Get the data and process it. +# The LBM-CPU pseudo-code (1/2) + +```C +void collision(float f[nx][ny][nz][q]) { + for (int x = 0; x < nx; ++x) { + for (int y = 0; y < ny; ++y) { + for (int z = 0; z < nz; ++z) { + float rho, j[3]; + compute_rho_j(f[x][y][z], &rho, j); + for (int i = 0; i < q; ++i) { + float feq = compute_feq(rho, j); + f[x][y][z][i] *= (1-omega) * f[x][y][z][i]; + f[x][y][z][i] += omega * feq; + } + } + } + } +``` + +# The LBM-CPU pseudo-code (2/2) + +```C +void propagation(float f[nx][ny][nz][q], + float fout[nx][ny][nz][q]) +{ + for (int x = 0; x < nx; ++x) { + for (int y = 0; y < ny; ++y) { + for (int z = 0; z < nz; ++z) { + for (int i = 0; i < q; ++i) { + int next_x = (x-(i32.f32 c[ipop,0]) + nx) % nx; + int next_y = (y-(i32.f32 c[ipop,1]) + ny) % ny; + int next_z = (z-(i32.f32 c[ipop,2]) + nz) % nz; + + fout[x][y][z][i] = f[x][y][z][i]; + } + } + } + } +} +``` + # The LBM pseudo-code +## One time-step + ```ocaml let time_step [nx][ny][nz][q] (f: [nx][ny][nz][q]f32) -> [nx][ny][nz][q]f32 = @@ -217,7 +257,7 @@ let time_step [nx][ny][nz][q] (f: [nx][ny][nz][q]f32) f: [nx][ny][nz][q]f32 -- 4d array feq: [nx][ny][nz][q]f32 -- 4d array rho: [nx][ny][nz]f32 -- 3d array -j: [nx][ny][nz][d]f32 -- 4d array +j: [nx][ny][nz][d]f32 -- 4d array ``` ## Functions @@ -236,8 +276,6 @@ let map3d 'a [nx][ny][nz]'b ## LBM equations -On **each** grid point: - \begin{equation} \rho=\sum_{i=0}^{q-1}f_i, \forall\ \bm{x}. \end{equation} @@ -268,7 +306,7 @@ let compute_j [nx][ny][nz][q] map3d (\fxyz -> map(\ci -> dotprod ci fxyz - ) (transpose c) -- intrinsic + ) c ) f ``` diff --git a/presentations/unige2020/code/Makefile b/presentations/unige2020/code/Makefile index cc6e33264955f87c888fefc6cc0938a5b46a51a5..87f2a5b997dd94eb03a53aecea248b8e60b7a1c5 100644 --- a/presentations/unige2020/code/Makefile +++ b/presentations/unige2020/code/Makefile @@ -1,7 +1,9 @@ -TARGET=c +TARGET=multicore ifeq ($(TARGET),cuda) CUDALIBS=-lcudart -lcuda -lnvrtc +else ifeq ($(TARGET), multicore) + MULTICORELIBS=-lpthread else ifeq ($(TARGET), opencl) OPENCLIBS=-lOpenCL endif @@ -9,9 +11,9 @@ endif CC=gcc FC=futhark OPTS=-Wall -Wextra -g -std=gnu11 -SANITIZERS=-fsanitize=address +# SANITIZERS=-fsanitize=address OPTIM=-O3 -LIBS=-lm $(SDLLIBS) $(CUDALIBS) $(OPENCLIBS) $(SANITIZERS) +LIBS=-lm $(SDLLIBS) $(CUDALIBS) $(OPENCLIBS) $(SANITIZERS) $(MULTICORELIBS) all: lbm3d