From 73eede77cd493e5973bd48a6cf24b3293fa24f1d Mon Sep 17 00:00:00 2001 From: "raphael.bach" <raphael.bach@etu.hesge.ch> Date: Sun, 26 Jun 2022 00:35:59 +0200 Subject: [PATCH] Add `struct fmpi_partition` --- include/fmpi_domain.h | 13 +++++- src/fmpi_domain.c | 96 ++++++++++++++++++++++++++++++++----------- src/fmpi_task.c | 26 ++++++------ 3 files changed, 96 insertions(+), 39 deletions(-) diff --git a/include/fmpi_domain.h b/include/fmpi_domain.h index 0b640d2..ddecd26 100644 --- a/include/fmpi_domain.h +++ b/include/fmpi_domain.h @@ -30,11 +30,19 @@ #include <stddef.h> // size_t // Internal #include "fmpi_data.h" +#include "fmpi_stencil.h" /*============================================================================== TYPE ==============================================================================*/ // Forward declaration from `fmpi_ctx.h` struct fmpi_ctx; +/*------------------------------------------------------------------------------ + fmpi_partition +------------------------------------------------------------------------------*/ +typedef struct fmpi_partition { + struct fmpi_data inner; + struct fmpi_data halo; +} fmpi_partition; /*------------------------------------------------------------------------------ fmpi_domain ------------------------------------------------------------------------------*/ @@ -43,7 +51,7 @@ struct fmpi_ctx; */ typedef struct fmpi_domain { const struct fmpi_data * data; //!< Data composing the domain. - struct fmpi_data * parts; //!< Partitions of the domain after decomposition. + struct fmpi_partition * parts; //!< Partitions of the domain after decomposition. size_t part_cnt; //!< Number of partitions. } fmpi_domain; /*============================================================================== @@ -53,7 +61,8 @@ typedef struct fmpi_domain { fmpi_new_domain() ------------------------------------------------------------------------------*/ struct fmpi_domain fmpi_new_domain( - const struct fmpi_ctx * ctx, const struct fmpi_data * data + const struct fmpi_ctx * ctx, const struct fmpi_data * data, + struct fmpi_stencil stencil ); /*============================================================================== GUARD diff --git a/src/fmpi_domain.c b/src/fmpi_domain.c index 63a8c21..98b4e24 100644 --- a/src/fmpi_domain.c +++ b/src/fmpi_domain.c @@ -29,6 +29,7 @@ #include <string.h> // memcpy() // Internal #include "fmpi_data.h" +#include "fmpi_stencil.h" #include "internal/fmpi_ctx.h" #include "internal/fmpi_error.h" #include "internal/fmpi_mpi.h" @@ -41,8 +42,9 @@ /** * TODO */ -static struct fmpi_data * fmpi_partition_block_1d( - const struct fmpi_ctx * ctx, const struct fmpi_data * data +static struct fmpi_partition * fmpi_partition_block_1d( + const struct fmpi_ctx * ctx, const struct fmpi_data * data, size_t proc_cnt, + const struct fmpi_stencil stencil ); /*============================================================================== PUBLIC FUNCTION DEFINITION @@ -52,8 +54,9 @@ static struct fmpi_data * fmpi_partition_block_1d( fmpi_new_domain() ------------------------------------------------------------------------------*/ struct fmpi_domain fmpi_new_domain( - const struct fmpi_ctx * const ctx, const struct fmpi_data * const data) -{ + const struct fmpi_ctx * const ctx, const struct fmpi_data * const data, + const struct fmpi_stencil stencil +){ assert(ctx != NULL); const size_t proc_cnt = (size_t)ctx->mpi->size; struct fmpi_domain domain = { @@ -61,7 +64,7 @@ struct fmpi_domain fmpi_new_domain( .parts = NULL, .part_cnt = proc_cnt }; - domain.parts = fmpi_partition_block_1d(ctx, data); + domain.parts = fmpi_partition_block_1d(ctx, data, proc_cnt, stencil); if(domain.parts == NULL) { FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_partition_block_1d() failed!"); } @@ -73,33 +76,80 @@ struct fmpi_domain fmpi_new_domain( /*------------------------------------------------------------------------------ fmpi_partition_block_1d() ------------------------------------------------------------------------------*/ -static struct fmpi_data * fmpi_partition_block_1d( - const struct fmpi_ctx * const ctx, const struct fmpi_data * const data +static struct fmpi_partition * fmpi_partition_block_1d( + const struct fmpi_ctx * const ctx, const struct fmpi_data * const data, + const size_t proc_cnt, const struct fmpi_stencil stencil ) { assert(ctx != NULL); assert(data != NULL); - const size_t proc_cnt = (size_t)ctx->mpi->size; - struct fmpi_data * parts = malloc(proc_cnt * sizeof(*parts)); + struct fmpi_partition * parts = malloc(proc_cnt * sizeof(*parts)); if(parts == NULL) { FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "malloc(parts) failed!"); return NULL; } - const size_t cnt_per_proc = data->cnt/proc_cnt; - size_t rem = data->cnt % proc_cnt; + const struct fmpi_type data_type = data->type; + const size_t type_size = data_type.size; + const size_t data_cnt = data->cnt; + //! @todo Handle proc_cnt > data_cnt + const size_t cnt_per_proc = data_cnt/proc_cnt; + size_t rem = data_cnt % proc_cnt; size_t offset = 0; + const size_t rank = (size_t)ctx->mpi->rank; for(size_t i = 0; i < proc_cnt; i++) { - const size_t cnt = rem != 0 ? cnt_per_proc + 1 : cnt_per_proc; - rem = rem != 0 ? rem - 1 : rem; - const size_t size = cnt * data->type.size; - parts[i] = (struct fmpi_data){ \ - .type = data->type, - .cnt = cnt, - .size = size, - .dim_len = {cnt, 1, 1}, - .dim_cnt = 1, - .raw = (char *)data->raw + offset - }; - offset += size; + const size_t inner_cnt = (rem != 0) ? (cnt_per_proc + 1) : cnt_per_proc; + const size_t inner_size = inner_cnt * type_size; + if(i == rank) { + // Inner + struct fmpi_data inner = { + .type = data_type, + .cnt = inner_cnt, + .size = inner_size, + .dim_len = {inner_cnt, 1, 1}, + .dim_cnt = 1, + .raw = (char *)data->raw + offset, + .gpu = NULL + }; + parts[i].inner = inner; + // Halo + if(stencil.type != FMPI_STENCIL_NONE) { + const size_t halo_cnt = inner_cnt + 2; + const size_t halo_size = halo_cnt * type_size; + struct fmpi_data halo = { + .type = data_type, + .cnt = halo_cnt, + .size = halo_size, + .dim_len = {halo_cnt, 1, 1}, + .dim_cnt = 1, + .raw = calloc(halo_cnt, type_size), + .gpu = NULL + }; + if(halo.raw == NULL) { + FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", + "calloc(halo.raw) failed!" + ); + goto error; + } + // Left boundary + if(i == 0) { + memcpy((char*)halo.raw+type_size, inner.raw, inner_size+type_size); + // Right boundary + } else if(i == proc_cnt-1) { + memcpy(halo.raw, (char*)inner.raw-type_size, halo_size); + // Middle + } else { + memcpy(halo.raw, (char*)inner.raw-type_size, inner_size+type_size); + } + parts[i].halo = halo; + } + } + offset += inner_size; + rem = (rem != 0) ? (rem - 1) : rem; } return parts; +error: + for(size_t i = 0; i < proc_cnt; i++) { + free(parts[i].halo.raw); + } + free(parts); + return NULL; } diff --git a/src/fmpi_task.c b/src/fmpi_task.c index 11477f8..0a71d3c 100644 --- a/src/fmpi_task.c +++ b/src/fmpi_task.c @@ -60,25 +60,24 @@ struct fmpi_task fmpi_task_register_sync( }; const size_t rank = (size_t)ctx->mpi->rank; for(size_t i = 0; i < task.args.cnt; i++) { - task.domains[i] = fmpi_new_domain(ctx, args->in[i]); + task.domains[i] = fmpi_new_domain(ctx, &args->in[i], stencil); if(task.domains[i].parts == NULL) { FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_new_domain() failed!"); continue; } + const struct fmpi_data * const data = (stencil.type != FMPI_STENCIL_NONE) + ? &task.domains[i].parts[rank].halo + : &task.domains[i].parts[rank].inner; //! @todo Could fmpi_futhark_new_data_async() be called here instead? void * gpu_data = fmpi_futhark_new_data_sync( - ctx->fut, task.domains[i].parts[rank].raw, task.domains[i].data.type.base, - task.domains[i].data.dim_cnt, - task.domains[i].parts[rank].dim_len[0], - task.domains[i].parts[rank].dim_len[1], - task.domains[i].parts[rank].dim_len[2] + ctx->fut, data->raw, data->type.base, data->dim_cnt, + data->dim_len[0], data->dim_len[1], data->dim_len[2] ); if(gpu_data == NULL) { FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_futhark_new_data_sync() failed!" ); } - task.domains[i].parts[rank].gpu = gpu_data; task.args.in[i].gpu = gpu_data; } ctx->tasks[ctx->task_cnt++] = task; @@ -106,24 +105,23 @@ struct fmpi_task fmpi_task_register_async( }; const size_t rank = (size_t)ctx->mpi->rank; for(size_t i = 0; i < task.args.cnt; i++) { - task.domains[i] = fmpi_new_domain(ctx, args->in[i]); + task.domains[i] = fmpi_new_domain(ctx, &args->in[i], stencil); if(task.domains[i].parts == NULL) { FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_new_domain() failed!"); continue; } + const struct fmpi_data * const data = (stencil.type != FMPI_STENCIL_NONE) + ? &task.domains[i].parts[rank].halo + : &task.domains[i].parts[rank].inner; void * gpu_data = fmpi_futhark_new_data_async( - ctx->fut, task.domains[i].parts[rank].raw, task.domains[i].data.type.base, - task.domains[i].data.dim_cnt, - task.domains[i].parts[rank].dim_len[0], - task.domains[i].parts[rank].dim_len[1], - task.domains[i].parts[rank].dim_len[2] + ctx->fut, data->raw, data->type.base, data->dim_cnt, + data->dim_len[0], data->dim_len[1], data->dim_len[2] ); if(gpu_data == NULL) { FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_futhark_new_data_async() failed!" ); } - task.domains[i].parts[rank].gpu = gpu_data; task.args.in[i].gpu = gpu_data; } ctx->tasks[ctx->task_cnt++] = task; -- GitLab