From 5a87d2eee737a1ee3519859f91b39f6b9a5bb88d Mon Sep 17 00:00:00 2001
From: "raphael.bach" <raphael.bach@etu.hesge.ch>
Date: Tue, 28 Jun 2022 20:21:10 +0200
Subject: [PATCH] Add support for 1D cellular automata

---
 Makefile                     | 41 +++++++++++++++--
 examples/rule_110/.gitignore |  7 +++
 examples/rule_110/Makefile   | 87 ++++++++++++++++++++++++++++++++++++
 examples/rule_110/main.c     | 51 +++++++++++++++++++++
 examples/rule_110/r110.fut   | 10 +++++
 include/fmpi_core.h          |  2 +-
 include/fmpi_task.h          |  2 +-
 src/fmpi_core.c              |  2 +-
 src/fmpi_task.c              | 62 ++++++++++++++++++++++++-
 9 files changed, 256 insertions(+), 8 deletions(-)
 create mode 100644 examples/rule_110/.gitignore
 create mode 100644 examples/rule_110/Makefile
 create mode 100644 examples/rule_110/main.c
 create mode 100644 examples/rule_110/r110.fut

diff --git a/Makefile b/Makefile
index 43eb1fb..1eba9c4 100644
--- a/Makefile
+++ b/Makefile
@@ -97,14 +97,16 @@ src/futhark/fmpi_entry.h src/futhark/fmpi_entry.c &: src/futhark/fmpi_entry.fut
 ################################################################################
 # BUILD TARGETS - EXAMPLES
 ################################################################################
+.PHONY: examples
+examples: array-sum rule-110
 #-------------------------------------------------------------------------------
 # Array sum
 #-------------------------------------------------------------------------------
 .PHONY: array-sum
 array-sum: export BUILD_MODE := $(BUILD_MODE)
 array-sum: export CFLAGS     := $(filter-out -Iinclude,$(CFLAGS)) -I../../include
-array-sum: export LDFLAGS    := -L../../build/$(BUILD_MODE)
-array-sum: export LDLIBS     := -lfmpi -lm
+array-sum: export LDFLAGS    := $(LDFLAGS) -L../../build/$(BUILD_MODE)
+array-sum: export LDLIBS     := $(LDLIBS) -lfmpi -lm
 array-sum:
 	$(MAKE) -C examples/array_sum all
 
@@ -113,6 +115,22 @@ array-sum-pp: export BUILD_MODE := $(BUILD_MODE)
 array-sum-pp: export CFLAGS     := $(filter-out -Iinclude,$(CFLAGS)) -I../../include
 array-sum-pp:
 	$(MAKE) -C examples/array_sum pp
+#-------------------------------------------------------------------------------
+# Rule 110
+#-------------------------------------------------------------------------------
+.PHONY: rule-110
+rule-110: export BUILD_MODE := $(BUILD_MODE)
+rule-110: export CFLAGS     := $(filter-out -Iinclude,$(CFLAGS)) -I../../include
+rule-110: export LDFLAGS    := $(LDFLAGS) -L../../build/$(BUILD_MODE)
+rule-110: export LDLIBS     := $(LDLIBS) -lfmpi -lm
+rule-110:
+	$(MAKE) -C examples/rule_110
+
+.PHONY: rule-110-pp
+rule-110-pp: export BUILD_MODE := $(BUILD_MODE)
+rule-110-pp: export CFLAGS     := $(filter-out -Iinclude,$(CFLAGS)) -I../../include
+rule-110-pp:
+	$(MAKE) -C examples/rule_110 pp
 ################################################################################
 # INSTALL TARGETS
 ################################################################################
@@ -188,14 +206,17 @@ clean-examples: clean-examples-$(BUILD_MODE)
 .PHONY: clean-examples-all
 clean-examples-all:
 	$(MAKE) -C examples/array_sum clean-all
+	$(MAKE) -C examples/rule_110 clean-all
 
 .PHONY: clean-examples-debug
 clean-examples-debug:
 	$(MAKE) -C examples/array_sum clean-debug
+	$(MAKE) -C examples/rule_110 clean-debug
 
 .PHONY: clean-examples-release
 clean-examples-release:
 	$(MAKE) -C examples/array_sum clean-release
+	$(MAKE) -C examples/rule_110 clean-release
 ################################################################################
 # REBUILD TARGETS
 ################################################################################
@@ -224,6 +245,20 @@ run-as-debug:
 .PHONY: run-as-release
 run-as-release:
 	$(MAKE) -C examples/array_sum run-release
+#-------------------------------------------------------------------------------
+# Rule 110
+#-------------------------------------------------------------------------------
+.PHONY: run-r110
+run-r110:
+	$(MAKE) -C examples/rule_110 run-$(BUILD_MODE)
+
+.PHONY: run-r110-debug
+run-r110-debug:
+	$(MAKE) -C examples/rule_110 run-debug
+
+.PHONY: run-r110-release
+run-r110-release:
+	$(MAKE) -C examples/rule_110 run-release
 ################################################################################
 # MISC TARGETS
 ################################################################################
@@ -285,7 +320,7 @@ help:
 	@printf "\n"
 	@printf "RUN:\n"
 	@printf "  run-as                     : Run 'Array sum' example.\n"
-	@printf "  run-gol                    : Run 'Game of Life' example.\n"
+	@printf "  run-r110                   : Run 'Rule 110' example.\n"
 	@printf "\n"
 	@printf "UNINSTALL:\n"
 	@printf "  uninstall                  : Uninstall fmpi static and shared libray, headers and pkg-config file.\n"
diff --git a/examples/rule_110/.gitignore b/examples/rule_110/.gitignore
new file mode 100644
index 0000000..11e91c4
--- /dev/null
+++ b/examples/rule_110/.gitignore
@@ -0,0 +1,7 @@
+*.json
+*.o
+*.pp
+r110-debug
+r110-release
+r110.c
+r110.h
diff --git a/examples/rule_110/Makefile b/examples/rule_110/Makefile
new file mode 100644
index 0000000..df605cb
--- /dev/null
+++ b/examples/rule_110/Makefile
@@ -0,0 +1,87 @@
+################################################################################
+# BUILD TARGETS - MAIN
+################################################################################
+NAME := r110
+
+.PHONY: all
+all: $(NAME)-$(BUILD_MODE)
+
+.PHONY: all-debug
+all_debug:
+	$(MAKE) all BUILD_MODE=debug
+
+.PHONY: all-release
+all_release:
+	$(MAKE) all BUILD_MODE=release
+
+$(NAME)-$(BUILD_MODE): main-$(BUILD_MODE).o $(NAME)-$(BUILD_MODE).o
+	$(CC) $^ -o $@ $(CFLAGS) $(LDFLAGS) $(LDLIBS)
+
+main-$(BUILD_MODE).o: main.c $(NAME).h
+	$(CC) $< -o $@ -c -fPIC $(CFLAGS)
+
+$(NAME)-$(BUILD_MODE).o: $(NAME).c $(NAME).h
+	$(CC) $< -o $@ -c -fPIC $(CFLAGS_FUTHARK)
+
+$(NAME).h $(NAME).c &: $(NAME).fut
+	futhark c --library $<
+
+.PHONY: pp
+pp: main.pp
+
+main.pp: main.c
+	$(CC) $< -o $@ $(CFLAGS) -dU -E
+################################################################################
+# CLEAN TARGETS
+################################################################################
+.PHONY: clean
+clean: clean-$(BUILD_MODE)
+
+.PHONY: clean-all
+clean-all: clean-debug clean-release clean-futhark
+
+.PHONY: clean-debug
+clean-debug:
+	rm -f $(NAME)-debug
+	rm -f $(NAME)-debug.o
+	rm -f main-debug.o
+
+.PHONY: clean-release
+clean-release:
+	rm -f $(NAME)-release
+	rm -f $(NAME)-release.o
+	rm -f main-release.o
+
+.PHONY: clean-futhark
+clean-futhark:
+	rm -f $(NAME).h
+	rm -f $(NAME).c
+	rm -f $(NAME).json
+################################################################################
+# REBUILD TARGETS
+################################################################################
+.PHONY: rebuild
+rebuild: clean all
+
+.PHONY: rebuild-debug
+rebuild_debug: clean-debug all-debug
+
+.PHONY: rebuild-release
+rebuild_release: clean-release all-release
+################################################################################
+# RUN TARGETS
+################################################################################
+.PHONY: run
+run: run-$(BUILD_MODE)
+
+.PHONY: run-debug
+run-debug: export LD_LIBRARY_PATH:=$(LD_LIBRARY_PATH):../../build/debug
+run-debug:
+	mpirun $(NAME)-debug
+
+.PHONY: run-release
+run-release: export LD_LIBRARY_PATH:=$(LD_LIBRARY_PATH):../../build/release
+run-release:
+	mpirun $(NAME)-release
+################################################################################
+.DELETE_ON_ERROR:
diff --git a/examples/rule_110/main.c b/examples/rule_110/main.c
new file mode 100644
index 0000000..05469ec
--- /dev/null
+++ b/examples/rule_110/main.c
@@ -0,0 +1,51 @@
+// C Standard Library
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+// fmpi
+#include <fmpi.h>
+// Internal
+#include "r110.h"
+
+#define T u8
+#define STEP_CNT 2
+
+FMPI_TASK_FUTHARK(rule_110, 1)
+
+int main(int argc, char * argv[])
+{
+    struct fmpi_ctx * ctx = fmpi_init(&argc, &argv);
+    if(ctx == NULL) {
+        fprintf(stderr, "fmpi_init() failed!\n");
+        return EXIT_FAILURE;
+    }
+    T in[] = {
+        1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1
+    };
+    #define in_size (sizeof(in)/sizeof(T))
+    T out[in_size];
+    struct fmpi_task rule_110_task = FMPI_REGISTER_SYNC_TASK(
+        ctx, rule_110, fmpi_stencil_1d(1),
+        fmpi_data_1d_out(ctx, out, in_size),
+        fmpi_data_1d_in(ctx, in, in_size)
+    );
+    for(size_t i = 0; i < STEP_CNT; i++) {
+        const int err = fmpi_run_task(ctx, &rule_110_task);
+        if(err != FMPI_SUCCESS) {
+            printf("Error rank=%d: fmpi_task_run_sync()\n", fmpi_world_rank(ctx));
+        }
+    }
+    const int err = fmpi_task_finalize(ctx, &rule_110_task, FMPI_TASK_OP_GATHER);
+    if(err != FMPI_SUCCESS) {
+        printf("Error rank=%d: fmpi_task_finalize()\n", fmpi_world_rank(ctx));
+    }
+    if(fmpi_is_root(ctx)) {
+        for(size_t i = 0; i < in_size; i++) {
+            printf("%hhd", out[i]);
+        }
+        printf("\n");
+    }
+    fmpi_exit(&ctx);
+    return EXIT_SUCCESS;
+}
diff --git a/examples/rule_110/r110.fut b/examples/rule_110/r110.fut
new file mode 100644
index 0000000..7ca5de8
--- /dev/null
+++ b/examples/rule_110/r110.fut
@@ -0,0 +1,10 @@
+entry rule_110 [n] (xs: [n]u8): []u8 =
+    let res = tabulate (n) (\i ->
+        if(i > 0 && i < (n-1)) then
+            let p = bool.u8 xs[i-1]
+            let q = bool.u8 xs[i]
+            let r = bool.u8 xs[i+1]
+            in u8.bool (((q && (!p))) || (q != r))
+        else xs[i]
+    )
+    in res[1:n-1]
diff --git a/include/fmpi_core.h b/include/fmpi_core.h
index 10664dd..9654644 100644
--- a/include/fmpi_core.h
+++ b/include/fmpi_core.h
@@ -129,7 +129,7 @@ int fmpi_world_barrier(const struct fmpi_ctx * ctx);
 /*------------------------------------------------------------------------------
     fmpi_run_task()
 ------------------------------------------------------------------------------*/
-int fmpi_run_task(const struct fmpi_ctx * ctx, const struct fmpi_task * task);
+int fmpi_run_task(const struct fmpi_ctx * ctx, struct fmpi_task * task);
 /*------------------------------------------------------------------------------
     fmpi_sync()
 ------------------------------------------------------------------------------*/
diff --git a/include/fmpi_task.h b/include/fmpi_task.h
index 9967f7d..c794826 100644
--- a/include/fmpi_task.h
+++ b/include/fmpi_task.h
@@ -162,7 +162,7 @@ struct fmpi_task fmpi_task_register(
  *  TODO
  * }
  */
-int fmpi_task_run_sync(const struct fmpi_ctx * ctx, const struct fmpi_task * task);
+int fmpi_task_run_sync(const struct fmpi_ctx * ctx, struct fmpi_task * task);
 /*------------------------------------------------------------------------------
     fmpi_task_run_async()
 ------------------------------------------------------------------------------*/
diff --git a/src/fmpi_core.c b/src/fmpi_core.c
index cb9065c..8989b49 100644
--- a/src/fmpi_core.c
+++ b/src/fmpi_core.c
@@ -105,7 +105,7 @@ int fmpi_world_barrier(const struct fmpi_ctx * const ctx)
     fmpi_run_task()
 ------------------------------------------------------------------------------*/
 int fmpi_run_task(
-    const struct fmpi_ctx * const ctx, const struct fmpi_task * const task
+    const struct fmpi_ctx * const ctx, struct fmpi_task * const task
 ){
     assert(ctx != NULL);
     assert(task != NULL);
diff --git a/src/fmpi_task.c b/src/fmpi_task.c
index 2845ae3..59c29c7 100644
--- a/src/fmpi_task.c
+++ b/src/fmpi_task.c
@@ -28,6 +28,7 @@
 #include <stdbool.h>
 #include <stddef.h> // NULL, size_t
 #include <stdio.h>
+#include <string.h> // memcpy()
 // Internal
 #include "fmpi_domain.h"
 #include "fmpi_stencil.h"
@@ -97,7 +98,7 @@ struct fmpi_task fmpi_task_register(
     fmpi_task_run_sync()
 ------------------------------------------------------------------------------*/
 int fmpi_task_run_sync(
-    const struct fmpi_ctx * const ctx, const struct fmpi_task * const task
+    const struct fmpi_ctx * const ctx, struct fmpi_task * const task
 ){
     assert(ctx != NULL);
     assert(task != NULL);
@@ -119,11 +120,68 @@ int fmpi_task_run_sync(
             ctx->fut, task->args.out.gpu, task->args.out.type.base,
             task->args.out.dim_cnt
         );
-        if(err != 0) {
+        if(err != FMPI_SUCCESS) {
+            FMPI_RAISE_ERROR(ctx->err_handler, "FMPI",
+                "fmpi_futhark_free_data_sync() failed!"
+            );
+        }
+    }
+    if(task->stencil.type != FMPI_STENCIL_NONE) {
+        const int rank = ctx->mpi->rank;
+        size_t type_size = task->domains[0].halo.type.size;
+        void * const halo = task->domains[0].halo.raw;
+        const void * const inner = task->args.out.raw;
+        void * const dest = (rank == 0) ? ((char *)halo + type_size) : halo;
+        const void * const src = (rank != 0) ? ((const char *)inner - type_size) : inner;
+        const size_t inner_size = task->domains[0].inner.size;
+        const size_t size = (rank != (ctx->mpi->size - 1)) ? (inner_size + type_size) : inner_size;
+        memcpy(dest, src, size);
+
+        const size_t idx_1 = type_size;
+        const size_t idx_n = task->domains[0].halo.cnt * type_size;
+        const size_t idx_np1 = idx_n + idx_1;
+
+              void * const buf_0   = task->domains[0].halo.raw;
+        const void * const buf_1   = (char *)buf_0 + idx_1;
+        const void * const buf_n   = (char *)buf_0 + idx_n;
+              void * const buf_np1 = (char *)buf_0 + idx_np1;
+
+        MPI_Datatype type = fmpi_mpi_type(task->domains[0].halo.type.base);
+        if((rank % 2) == 0) {
+            int left = (rank != 0) ? (rank - 1) : MPI_PROC_NULL;
+            int right = rank + 1;
+            MPI_Send(buf_n  , 1, type, right, 0, ctx->mpi->world);
+            MPI_Recv(buf_0  , 1, type, left,  1, ctx->mpi->world, MPI_STATUS_IGNORE);
+            MPI_Send(buf_1  , 1, type, left,  0, ctx->mpi->world);
+            MPI_Recv(buf_np1, 1, type, right, 1, ctx->mpi->world, MPI_STATUS_IGNORE);
+
+        } else {
+            int left = rank - 1;
+            int right = (rank != (ctx->mpi->size-1)) ? (rank + 1) : MPI_PROC_NULL;
+            MPI_Recv(buf_0  , 1, type, left,  0, ctx->mpi->world, MPI_STATUS_IGNORE);
+            MPI_Send(buf_n  , 1, type, right, 1, ctx->mpi->world);
+            MPI_Recv(buf_np1, 1, type, right, 0, ctx->mpi->world, MPI_STATUS_IGNORE);
+            MPI_Send(buf_1  , 1, type, left,  1, ctx->mpi->world);
+        }
+        const int err = fmpi_futhark_free_data_sync(
+            ctx->fut, task->args.in[0].gpu, task->domains[0].halo.type.base,
+            task->domains[0].halo.cnt
+        );
+        if(err != FMPI_SUCCESS) {
             FMPI_RAISE_ERROR(ctx->err_handler, "FMPI",
                 "fmpi_futhark_free_data_sync() failed!"
             );
         }
+        task->args.in[0].gpu = fmpi_futhark_new_data_sync(
+            ctx->fut, task->domains[0].halo.raw, task->domains[0].halo.type.base,
+            task->domains[0].halo.dim_cnt, task->domains[0].halo.dim_len[0],
+            task->domains[0].halo.dim_len[1], task->domains[0].halo.dim_len[2]
+        );
+        if(task->args.in[0].gpu == NULL) {
+            FMPI_RAISE_ERROR(ctx->err_handler, "FMPI",
+                "fmpi_futhark_new_data_sync() failed!"
+            );
+        }
     }
     return err_id;
 }
-- 
GitLab