diff --git a/include/fmpi_domain.h b/include/fmpi_domain.h index ddecd26ff480282b0464243aaf9c11f8d429016e..41c8c61bc38f2746dd9eaac1a331edff9cf093ca 100644 --- a/include/fmpi_domain.h +++ b/include/fmpi_domain.h @@ -36,13 +36,6 @@ ==============================================================================*/ // Forward declaration from `fmpi_ctx.h` struct fmpi_ctx; -/*------------------------------------------------------------------------------ - fmpi_partition -------------------------------------------------------------------------------*/ -typedef struct fmpi_partition { - struct fmpi_data inner; - struct fmpi_data halo; -} fmpi_partition; /*------------------------------------------------------------------------------ fmpi_domain ------------------------------------------------------------------------------*/ @@ -51,7 +44,8 @@ typedef struct fmpi_partition { */ typedef struct fmpi_domain { const struct fmpi_data * data; //!< Data composing the domain. - struct fmpi_partition * parts; //!< Partitions of the domain after decomposition. + struct fmpi_data inner; + struct fmpi_data halo; size_t part_cnt; //!< Number of partitions. } fmpi_domain; /*============================================================================== diff --git a/src/fmpi_domain.c b/src/fmpi_domain.c index 98b4e24beb2c0e85ee99e7e2b9d9e293a85370e1..e44e914c5e85ac06a5798af134260da2d844f7a6 100644 --- a/src/fmpi_domain.c +++ b/src/fmpi_domain.c @@ -27,6 +27,7 @@ #include <assert.h> #include <stdlib.h> // size_t, NULL, malloc(), free() #include <string.h> // memcpy() +#include <stdio.h> // Internal #include "fmpi_data.h" #include "fmpi_stencil.h" @@ -42,9 +43,15 @@ /** * TODO */ -static struct fmpi_partition * fmpi_partition_block_1d( - const struct fmpi_ctx * ctx, const struct fmpi_data * data, size_t proc_cnt, - const struct fmpi_stencil stencil +static struct fmpi_data fmpi_partition_block_1d( + const struct fmpi_ctx * ctx, const struct fmpi_data * data +); +/*------------------------------------------------------------------------------ + fmpi_halo_1d() +------------------------------------------------------------------------------*/ +static struct fmpi_data fmpi_halo_1d( + const struct fmpi_ctx * ctx, struct fmpi_data data, + struct fmpi_stencil stencil ); /*============================================================================== PUBLIC FUNCTION DEFINITION @@ -61,12 +68,11 @@ struct fmpi_domain fmpi_new_domain( const size_t proc_cnt = (size_t)ctx->mpi->size; struct fmpi_domain domain = { .data = data, - .parts = NULL, .part_cnt = proc_cnt }; - domain.parts = fmpi_partition_block_1d(ctx, data, proc_cnt, stencil); - if(domain.parts == NULL) { - FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_partition_block_1d() failed!"); + domain.inner = fmpi_partition_block_1d(ctx, data); + if(stencil.type != FMPI_STENCIL_NONE) { + domain.halo = fmpi_halo_1d(ctx, domain.inner, stencil); } return domain; } @@ -76,80 +82,64 @@ struct fmpi_domain fmpi_new_domain( /*------------------------------------------------------------------------------ fmpi_partition_block_1d() ------------------------------------------------------------------------------*/ -static struct fmpi_partition * fmpi_partition_block_1d( - const struct fmpi_ctx * const ctx, const struct fmpi_data * const data, - const size_t proc_cnt, const struct fmpi_stencil stencil +static struct fmpi_data fmpi_partition_block_1d( + const struct fmpi_ctx * const ctx, const struct fmpi_data * const data ) { assert(ctx != NULL); assert(data != NULL); - struct fmpi_partition * parts = malloc(proc_cnt * sizeof(*parts)); - if(parts == NULL) { - FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "malloc(parts) failed!"); - return NULL; - } - const struct fmpi_type data_type = data->type; - const size_t type_size = data_type.size; - const size_t data_cnt = data->cnt; - //! @todo Handle proc_cnt > data_cnt - const size_t cnt_per_proc = data_cnt/proc_cnt; - size_t rem = data_cnt % proc_cnt; - size_t offset = 0; + const size_t rank = (size_t)ctx->mpi->rank; - for(size_t i = 0; i < proc_cnt; i++) { - const size_t inner_cnt = (rem != 0) ? (cnt_per_proc + 1) : cnt_per_proc; - const size_t inner_size = inner_cnt * type_size; - if(i == rank) { - // Inner - struct fmpi_data inner = { - .type = data_type, - .cnt = inner_cnt, - .size = inner_size, - .dim_len = {inner_cnt, 1, 1}, - .dim_cnt = 1, - .raw = (char *)data->raw + offset, - .gpu = NULL - }; - parts[i].inner = inner; - // Halo - if(stencil.type != FMPI_STENCIL_NONE) { - const size_t halo_cnt = inner_cnt + 2; - const size_t halo_size = halo_cnt * type_size; - struct fmpi_data halo = { - .type = data_type, - .cnt = halo_cnt, - .size = halo_size, - .dim_len = {halo_cnt, 1, 1}, - .dim_cnt = 1, - .raw = calloc(halo_cnt, type_size), - .gpu = NULL - }; - if(halo.raw == NULL) { - FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", - "calloc(halo.raw) failed!" - ); - goto error; - } - // Left boundary - if(i == 0) { - memcpy((char*)halo.raw+type_size, inner.raw, inner_size+type_size); - // Right boundary - } else if(i == proc_cnt-1) { - memcpy(halo.raw, (char*)inner.raw-type_size, halo_size); - // Middle - } else { - memcpy(halo.raw, (char*)inner.raw-type_size, inner_size+type_size); - } - parts[i].halo = halo; - } - } - offset += inner_size; - rem = (rem != 0) ? (rem - 1) : rem; + const size_t cnt_per_proc = data->cnt/(size_t)ctx->mpi->size; + const size_t rem = data->cnt % (size_t)ctx->mpi->size; + const size_t cnt = (rank < rem) ? (cnt_per_proc + 1) : cnt_per_proc; + const size_t size = cnt * data->type.size; + const size_t offset = (rank * size) + rem; + return (struct fmpi_data){ + .type = data->type, + .cnt = cnt, + .size = size, + .dim_len = {cnt, 1, 1}, + .dim_cnt = 1, + .raw = (char *)data->raw + offset, + .gpu = NULL + }; +} +/*------------------------------------------------------------------------------ + fmpi_create_halo() +------------------------------------------------------------------------------*/ +static struct fmpi_data fmpi_halo_1d( + const struct fmpi_ctx * const ctx, const struct fmpi_data data, + const struct fmpi_stencil stencil +){ + assert(ctx != NULL); + + const size_t cnt = data.cnt + (2 * stencil.length); + const size_t type_size = data.type.size; + const size_t size = cnt * type_size; + struct fmpi_data halo = { + .type = data.type, + .cnt = cnt, + .size = size, + .dim_len = {cnt, 1, 1}, + .dim_cnt = 1, + .raw = calloc(cnt, type_size), + .gpu = NULL + }; + if(halo.raw == NULL) { + FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", + "calloc(halo.raw) failed!" + ); } - return parts; -error: - for(size_t i = 0; i < proc_cnt; i++) { - free(parts[i].halo.raw); + const size_t rank = (size_t)ctx->mpi->rank; + // Left boundary + if(rank == 0) { + memcpy(((char *)halo.raw + type_size), data.raw, (data.size + type_size)); + // Right boundary + } else if(rank == (size_t)(ctx->mpi->size - 1)) { + memcpy(halo.raw, ((const char *)(data.raw) - type_size), size); + // Middle + } else { + memcpy(halo.raw, ((const char *)(data.raw) - type_size), (data.size + type_size)); } - free(parts); - return NULL; + return halo; } diff --git a/src/fmpi_task.c b/src/fmpi_task.c index 8b482f4fb87fae383364183b7d65f5fc7edc6fb8..f7b6503b7769d2f0585ce9e8cafb7f3dfd7b2f59 100644 --- a/src/fmpi_task.c +++ b/src/fmpi_task.c @@ -58,16 +58,11 @@ struct fmpi_task fmpi_task_register_sync( .args = *args, .stencil = stencil }; - const size_t rank = (size_t)ctx->mpi->rank; for(size_t i = 0; i < task.args.cnt; i++) { task.domains[i] = fmpi_new_domain(ctx, &args->in[i], stencil); - if(task.domains[i].parts == NULL) { - FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_new_domain() failed!"); - continue; - } const struct fmpi_data * const data = (stencil.type != FMPI_STENCIL_NONE) - ? &task.domains[i].parts[rank].halo - : &task.domains[i].parts[rank].inner; + ? &task.domains[i].halo + : &task.domains[i].inner; //! @todo Could fmpi_futhark_new_data_async() be called here instead? void * gpu_data = fmpi_futhark_new_data_sync( ctx->fut, data->raw, data->type.base, data->dim_cnt, @@ -103,16 +98,11 @@ struct fmpi_task fmpi_task_register_async( .args = *args, .stencil = stencil }; - const size_t rank = (size_t)ctx->mpi->rank; for(size_t i = 0; i < task.args.cnt; i++) { task.domains[i] = fmpi_new_domain(ctx, &args->in[i], stencil); - if(task.domains[i].parts == NULL) { - FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_new_domain() failed!"); - continue; - } const struct fmpi_data * const data = (stencil.type != FMPI_STENCIL_NONE) - ? &task.domains[i].parts[rank].halo - : &task.domains[i].parts[rank].inner; + ? &task.domains[i].halo + : &task.domains[i].inner; void * gpu_data = fmpi_futhark_new_data_async( ctx->fut, data->raw, data->type.base, data->dim_cnt, data->dim_len[0], data->dim_len[1], data->dim_len[2] @@ -135,6 +125,7 @@ int fmpi_task_run_sync( ){ assert(ctx != NULL); assert(task != NULL); + const int err_id = task->func(ctx, &task->args); fmpi_futhark_sync(ctx->fut); fmpi_futhark_check_error(ctx->fut, "task->func"); @@ -195,7 +186,7 @@ int fmpi_task_finalize( return err; } if(op == FMPI_TASK_OP_GATHER) { - const size_t cnt = task->domains[0].parts[ctx->mpi->rank].inner.cnt; + const size_t cnt = task->domains[0].inner.cnt; MPI_Datatype type = fmpi_mpi_type(task->args.out.type.base); const int err = fmpi_mpi_world_gather_in_place( ctx->mpi, task->args.out.raw, type, cnt, cnt