From 73eede77cd493e5973bd48a6cf24b3293fa24f1d Mon Sep 17 00:00:00 2001
From: "raphael.bach" <raphael.bach@etu.hesge.ch>
Date: Sun, 26 Jun 2022 00:35:59 +0200
Subject: [PATCH] Add `struct fmpi_partition`

---
 include/fmpi_domain.h | 13 +++++-
 src/fmpi_domain.c     | 96 ++++++++++++++++++++++++++++++++-----------
 src/fmpi_task.c       | 26 ++++++------
 3 files changed, 96 insertions(+), 39 deletions(-)

diff --git a/include/fmpi_domain.h b/include/fmpi_domain.h
index 0b640d2..ddecd26 100644
--- a/include/fmpi_domain.h
+++ b/include/fmpi_domain.h
@@ -30,11 +30,19 @@
 #include <stddef.h> // size_t
 // Internal
 #include "fmpi_data.h"
+#include "fmpi_stencil.h"
 /*==============================================================================
     TYPE
 ==============================================================================*/
 // Forward declaration from `fmpi_ctx.h`
 struct fmpi_ctx;
+/*------------------------------------------------------------------------------
+    fmpi_partition
+------------------------------------------------------------------------------*/
+typedef struct fmpi_partition {
+    struct fmpi_data inner;
+    struct fmpi_data halo;
+} fmpi_partition;
 /*------------------------------------------------------------------------------
     fmpi_domain
 ------------------------------------------------------------------------------*/
@@ -43,7 +51,7 @@ struct fmpi_ctx;
  */
 typedef struct fmpi_domain {
     const struct fmpi_data * data; //!< Data composing the domain.
-    struct fmpi_data * parts; //!< Partitions of the domain after decomposition.
+    struct fmpi_partition * parts; //!< Partitions of the domain after decomposition.
     size_t part_cnt;          //!< Number of partitions.
 } fmpi_domain;
 /*==============================================================================
@@ -53,7 +61,8 @@ typedef struct fmpi_domain {
     fmpi_new_domain()
 ------------------------------------------------------------------------------*/
 struct fmpi_domain fmpi_new_domain(
-    const struct fmpi_ctx * ctx, const struct fmpi_data * data
+    const struct fmpi_ctx * ctx, const struct fmpi_data * data,
+    struct fmpi_stencil stencil
 );
 /*==============================================================================
     GUARD
diff --git a/src/fmpi_domain.c b/src/fmpi_domain.c
index 63a8c21..98b4e24 100644
--- a/src/fmpi_domain.c
+++ b/src/fmpi_domain.c
@@ -29,6 +29,7 @@
 #include <string.h> // memcpy()
 // Internal
 #include "fmpi_data.h"
+#include "fmpi_stencil.h"
 #include "internal/fmpi_ctx.h"
 #include "internal/fmpi_error.h"
 #include "internal/fmpi_mpi.h"
@@ -41,8 +42,9 @@
 /**
  * TODO
  */
-static struct fmpi_data * fmpi_partition_block_1d(
-    const struct fmpi_ctx * ctx, const struct fmpi_data * data
+static struct fmpi_partition * fmpi_partition_block_1d(
+    const struct fmpi_ctx * ctx, const struct fmpi_data * data, size_t proc_cnt,
+    const struct fmpi_stencil stencil
 );
 /*==============================================================================
     PUBLIC FUNCTION DEFINITION
@@ -52,8 +54,9 @@ static struct fmpi_data * fmpi_partition_block_1d(
     fmpi_new_domain()
 ------------------------------------------------------------------------------*/
 struct fmpi_domain fmpi_new_domain(
-    const struct fmpi_ctx * const ctx, const struct fmpi_data * const data)
-{
+    const struct fmpi_ctx * const ctx, const struct fmpi_data * const data,
+    const struct fmpi_stencil stencil
+){
     assert(ctx != NULL);
     const size_t proc_cnt = (size_t)ctx->mpi->size;
     struct fmpi_domain domain = {
@@ -61,7 +64,7 @@ struct fmpi_domain fmpi_new_domain(
         .parts = NULL,
         .part_cnt = proc_cnt
     };
-    domain.parts = fmpi_partition_block_1d(ctx, data);
+    domain.parts = fmpi_partition_block_1d(ctx, data, proc_cnt, stencil);
     if(domain.parts == NULL) {
         FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_partition_block_1d() failed!");
     }
@@ -73,33 +76,80 @@ struct fmpi_domain fmpi_new_domain(
 /*------------------------------------------------------------------------------
     fmpi_partition_block_1d()
 ------------------------------------------------------------------------------*/
-static struct fmpi_data * fmpi_partition_block_1d(
-    const struct fmpi_ctx * const ctx, const struct fmpi_data * const data
+static struct fmpi_partition * fmpi_partition_block_1d(
+    const struct fmpi_ctx * const ctx, const struct fmpi_data * const data,
+    const size_t proc_cnt, const struct fmpi_stencil stencil
 ) {
     assert(ctx != NULL);
     assert(data != NULL);
-    const size_t proc_cnt = (size_t)ctx->mpi->size;
-    struct fmpi_data * parts = malloc(proc_cnt * sizeof(*parts));
+    struct fmpi_partition * parts = malloc(proc_cnt * sizeof(*parts));
     if(parts == NULL) {
         FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "malloc(parts) failed!");
         return NULL;
     }
-    const size_t cnt_per_proc = data->cnt/proc_cnt;
-    size_t rem = data->cnt % proc_cnt;
+    const struct fmpi_type data_type = data->type;
+    const size_t type_size = data_type.size;
+    const size_t data_cnt = data->cnt;
+    //! @todo Handle proc_cnt > data_cnt
+    const size_t cnt_per_proc = data_cnt/proc_cnt;
+    size_t rem = data_cnt % proc_cnt;
     size_t offset = 0;
+    const size_t rank = (size_t)ctx->mpi->rank;
     for(size_t i = 0; i < proc_cnt; i++) {
-        const size_t cnt = rem != 0 ? cnt_per_proc + 1 : cnt_per_proc;
-        rem = rem != 0 ? rem - 1 : rem;
-        const size_t size = cnt * data->type.size;
-        parts[i] = (struct fmpi_data){ \
-            .type = data->type,
-            .cnt = cnt,
-            .size = size,
-            .dim_len = {cnt, 1, 1},
-            .dim_cnt = 1,
-            .raw = (char *)data->raw + offset
-        };
-        offset += size;
+        const size_t inner_cnt = (rem != 0) ? (cnt_per_proc + 1) : cnt_per_proc;
+        const size_t inner_size = inner_cnt * type_size;
+        if(i == rank) {
+            // Inner
+            struct fmpi_data inner = {
+                .type = data_type,
+                .cnt = inner_cnt,
+                .size = inner_size,
+                .dim_len = {inner_cnt, 1, 1},
+                .dim_cnt = 1,
+                .raw = (char *)data->raw + offset,
+                .gpu = NULL
+            };
+            parts[i].inner = inner;
+            // Halo
+            if(stencil.type != FMPI_STENCIL_NONE) {
+                const size_t halo_cnt = inner_cnt + 2;
+                const size_t halo_size = halo_cnt * type_size;
+                struct fmpi_data halo = {
+                    .type = data_type,
+                    .cnt = halo_cnt,
+                    .size = halo_size,
+                    .dim_len = {halo_cnt, 1, 1},
+                    .dim_cnt = 1,
+                    .raw = calloc(halo_cnt, type_size),
+                    .gpu = NULL
+                };
+                if(halo.raw == NULL) {
+                    FMPI_RAISE_ERROR(ctx->err_handler, "FMPI",
+                        "calloc(halo.raw) failed!"
+                    );
+                    goto error;
+                }
+                // Left boundary
+                if(i == 0) {
+                    memcpy((char*)halo.raw+type_size, inner.raw, inner_size+type_size);
+                // Right boundary
+                } else if(i == proc_cnt-1) {
+                    memcpy(halo.raw, (char*)inner.raw-type_size, halo_size);
+                // Middle
+                } else {
+                    memcpy(halo.raw, (char*)inner.raw-type_size, inner_size+type_size);
+                }
+                parts[i].halo = halo;
+            }
+        }
+        offset += inner_size;
+        rem = (rem != 0) ? (rem - 1) : rem;
     }
     return parts;
+error:
+    for(size_t i = 0; i < proc_cnt; i++) {
+        free(parts[i].halo.raw);
+    }
+    free(parts);
+    return NULL;
 }
diff --git a/src/fmpi_task.c b/src/fmpi_task.c
index 11477f8..0a71d3c 100644
--- a/src/fmpi_task.c
+++ b/src/fmpi_task.c
@@ -60,25 +60,24 @@ struct fmpi_task fmpi_task_register_sync(
     };
     const size_t rank = (size_t)ctx->mpi->rank;
     for(size_t i = 0; i < task.args.cnt; i++) {
-        task.domains[i] = fmpi_new_domain(ctx, args->in[i]);
+        task.domains[i] = fmpi_new_domain(ctx, &args->in[i], stencil);
         if(task.domains[i].parts == NULL) {
             FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_new_domain() failed!");
             continue;
         }
+        const struct fmpi_data * const data = (stencil.type != FMPI_STENCIL_NONE)
+            ? &task.domains[i].parts[rank].halo
+            : &task.domains[i].parts[rank].inner;
         //! @todo Could fmpi_futhark_new_data_async() be called here instead?
         void * gpu_data = fmpi_futhark_new_data_sync(
-            ctx->fut, task.domains[i].parts[rank].raw, task.domains[i].data.type.base,
-            task.domains[i].data.dim_cnt,
-            task.domains[i].parts[rank].dim_len[0],
-            task.domains[i].parts[rank].dim_len[1],
-            task.domains[i].parts[rank].dim_len[2]
+            ctx->fut, data->raw, data->type.base, data->dim_cnt,
+            data->dim_len[0], data->dim_len[1], data->dim_len[2]
         );
         if(gpu_data == NULL) {
             FMPI_RAISE_ERROR(ctx->err_handler, "FMPI",
                 "fmpi_futhark_new_data_sync() failed!"
             );
         }
-        task.domains[i].parts[rank].gpu = gpu_data;
         task.args.in[i].gpu = gpu_data;
     }
     ctx->tasks[ctx->task_cnt++] = task;
@@ -106,24 +105,23 @@ struct fmpi_task fmpi_task_register_async(
     };
     const size_t rank = (size_t)ctx->mpi->rank;
     for(size_t i = 0; i < task.args.cnt; i++) {
-        task.domains[i] = fmpi_new_domain(ctx, args->in[i]);
+        task.domains[i] = fmpi_new_domain(ctx, &args->in[i], stencil);
         if(task.domains[i].parts == NULL) {
             FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_new_domain() failed!");
             continue;
         }
+        const struct fmpi_data * const data = (stencil.type != FMPI_STENCIL_NONE)
+            ? &task.domains[i].parts[rank].halo
+            : &task.domains[i].parts[rank].inner;
         void * gpu_data = fmpi_futhark_new_data_async(
-            ctx->fut, task.domains[i].parts[rank].raw, task.domains[i].data.type.base,
-            task.domains[i].data.dim_cnt,
-            task.domains[i].parts[rank].dim_len[0],
-            task.domains[i].parts[rank].dim_len[1],
-            task.domains[i].parts[rank].dim_len[2]
+            ctx->fut, data->raw, data->type.base, data->dim_cnt,
+            data->dim_len[0], data->dim_len[1], data->dim_len[2]
         );
         if(gpu_data == NULL) {
             FMPI_RAISE_ERROR(ctx->err_handler, "FMPI",
                 "fmpi_futhark_new_data_async() failed!"
             );
         }
-        task.domains[i].parts[rank].gpu = gpu_data;
         task.args.in[i].gpu = gpu_data;
     }
     ctx->tasks[ctx->task_cnt++] = task;
-- 
GitLab