diff --git a/src/fmpi_domain.c b/src/fmpi_domain.c index 07613670045dbb5e4260b0b87c853789709428c0..8e70be51a43d229cbf61191ab93a0bd0e8c4f57b 100644 --- a/src/fmpi_domain.c +++ b/src/fmpi_domain.c @@ -58,6 +58,13 @@ static struct fmpi_data fmpi_halo_1d( const struct fmpi_ctx * ctx, struct fmpi_data data, struct fmpi_stencil stencil ); +/*------------------------------------------------------------------------------ + fmpi_halo_2d() +------------------------------------------------------------------------------*/ +static struct fmpi_data fmpi_halo_2d( + const struct fmpi_ctx * ctx, struct fmpi_data data, + struct fmpi_stencil stencil +); /*============================================================================== PUBLIC FUNCTION DEFINITION ==============================================================================*/ @@ -226,3 +233,62 @@ static struct fmpi_data fmpi_halo_1d( } return halo; } +/*------------------------------------------------------------------------------ + fmpi_halo_2d() +------------------------------------------------------------------------------*/ +static struct fmpi_data fmpi_halo_2d( + const struct fmpi_ctx * const ctx, const struct fmpi_data data, + const struct fmpi_stencil stencil +){ + assert(ctx != NULL); + + const size_t dim_len_x = data.dim_len[0] + (2 * stencil.length); + const size_t dim_len_y = data.dim_len[1] + (2 * stencil.length); + const size_t cnt = dim_len_x * dim_len_y; + const size_t type_size = data.type.size; + const size_t size = cnt * type_size; + struct fmpi_data halo = { + .type = data.type, + .cnt = cnt, + .size = size, + .dim_len = {dim_len_x, dim_len_y, 1}, + .dim_cnt = 2, + .raw = calloc(cnt, type_size), + .gpu = NULL + }; + if(halo.raw == NULL) { + FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", + "calloc(halo.raw) failed!" + ); + } + const size_t rank = (size_t)ctx->mpi->rank; + // First rank + if(rank == 0) { + for(size_t i = 0; i < data.dim_len[1] + stencil.length; i++) { + const size_t dest_offset = ((i + stencil.length) * dim_len_x + stencil.length) * type_size; + void * const dest = (char *)halo.raw + dest_offset; + const size_t len = data.dim_len[0] * type_size; + const void * const src = (char *)data.raw + (i * len); + memcpy(dest, src, len); + } + // Last rank + } else if(rank == (size_t)(ctx->mpi->size - 1)) { + for(size_t i = 0; i < data.dim_len[1] + stencil.length; i++) { + const size_t dest_offset = (i * dim_len_x + stencil.length) * type_size; + void * const dest = (char *)halo.raw + dest_offset; + const size_t len = data.dim_len[0] * type_size; + const void * const src = (char *)data.raw - len + (i * len); + memcpy(dest, src, len); + } + // Middle rank + } else { + for(size_t i = 0; i < data.dim_len[1] + (2 * stencil.length); i++) { + const size_t dest_offset = (i * dim_len_x + stencil.length) * type_size; + void * const dest = (char *)halo.raw + dest_offset; + const size_t len = data.dim_len[0] * type_size; + const void * const src = (char *)data.raw - len + (i * len); + memcpy(dest, src, len); + } + } + return halo; +}