From 5f89c7f3bca45781a1ed9668d108e5c6b247bea7 Mon Sep 17 00:00:00 2001
From: Orestis <orestis.malaspinas@pm.me>
Date: Mon, 5 Jul 2021 17:00:12 +0200
Subject: [PATCH] updated pres

---
 presentations/pasc/pres.md | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/presentations/pasc/pres.md b/presentations/pasc/pres.md
index 86f79c8..99b1408 100644
--- a/presentations/pasc/pres.md
+++ b/presentations/pasc/pres.md
@@ -321,7 +321,7 @@ f_i^\mathrm{eq}=w_i\rho\left(1+\frac{\bm{c}_i\cdot \bm{u}}{c_s^2}+\frac{1}{2c_s^
 ## Futhark code
 
 ```ocaml
-map_3d(\rho_xyz j_xyz
+map2_3d(\rho_xyz j_xyz
     let u = map(\j_xyzi -> j_xyzi / rho_xyz ) j_xyz
     let u_sqr = dotprod u u
 
@@ -330,7 +330,7 @@ map_3d(\rho_xyz j_xyz
         in rho_xyz * wi *
             (1 + 3 * c_u + 4.5 * c_u * c_u - 1.5 * u_sqr)
     ) w c
-) (zip rho j)
+) rho j
 ```
 
 # Collision
@@ -374,20 +374,29 @@ tabulate_4d nx ny nz q (\x y z ipop ->
 
 # Summary
 
+## What did we achieve here?
+
 * A simple yet complete fluid flow simulator.
 * Lines of readable and "easy" to debug Futhark code: 110.
-* Single precision, periodic, only arrays: 250 MLPUS.
+* Single precision, periodic, only arrays: 350 MLPUS.
+
+## Not bad: but we can do better
+
+. . .
 
-*Not bad: but we can do better.*
+* Actual optimized GPU codes do typically 2-3 GLUPS on similar GPUs.
+* Can we reach that?
 
 # How can we go faster?
 
+## We can help the compiler a bit
+
 * Arrays are aggressively parallelized: each dimension is flattened.
 * For small dimensions it is usually not worth it.
 * Replace length 3, or length 27 arrays by tuples: better use of GPU 
   architecture or use `INCREMENTAL_FLATTENING`.
 * `[](a, b, c, ..) -> ([]a, []b, []c, ...)`{.ocaml} automatically by the compiler.
-* Result: with a code of 150 lines, we go to 1.5 GLUPS on GPU, 11 MLPUS on a 
+* Result: with a code of 150 lines, we go to 2.3 GLUPS on GPU, 11 MLPUS on a 
   single core, 400 MLUPS on a multi-core machine.
 * Within 10-20\% of state of the art optimized GPU codes. 
 
@@ -426,6 +435,10 @@ tabulate_4d nx ny nz q (\x y z ipop ->
 * M. El Kharroubi,
 * G. Laurie.
 
+## The Futhark compilator team
+
+## The Hasler foundation
+
 # Questions?
 
 ## Futhark webpage: <https://futhark-lang.org/>
-- 
GitLab