Skip to content
Snippets Groups Projects
Verified Commit bf0f069c authored by raphael.bach's avatar raphael.bach
Browse files

Remove `fmpi_partition`

parent 35484f46
No related branches found
No related tags found
No related merge requests found
...@@ -36,13 +36,6 @@ ...@@ -36,13 +36,6 @@
==============================================================================*/ ==============================================================================*/
// Forward declaration from `fmpi_ctx.h` // Forward declaration from `fmpi_ctx.h`
struct fmpi_ctx; struct fmpi_ctx;
/*------------------------------------------------------------------------------
fmpi_partition
------------------------------------------------------------------------------*/
typedef struct fmpi_partition {
struct fmpi_data inner;
struct fmpi_data halo;
} fmpi_partition;
/*------------------------------------------------------------------------------ /*------------------------------------------------------------------------------
fmpi_domain fmpi_domain
------------------------------------------------------------------------------*/ ------------------------------------------------------------------------------*/
...@@ -51,7 +44,8 @@ typedef struct fmpi_partition { ...@@ -51,7 +44,8 @@ typedef struct fmpi_partition {
*/ */
typedef struct fmpi_domain { typedef struct fmpi_domain {
const struct fmpi_data * data; //!< Data composing the domain. const struct fmpi_data * data; //!< Data composing the domain.
struct fmpi_partition * parts; //!< Partitions of the domain after decomposition. struct fmpi_data inner;
struct fmpi_data halo;
size_t part_cnt; //!< Number of partitions. size_t part_cnt; //!< Number of partitions.
} fmpi_domain; } fmpi_domain;
/*============================================================================== /*==============================================================================
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <assert.h> #include <assert.h>
#include <stdlib.h> // size_t, NULL, malloc(), free() #include <stdlib.h> // size_t, NULL, malloc(), free()
#include <string.h> // memcpy() #include <string.h> // memcpy()
#include <stdio.h>
// Internal // Internal
#include "fmpi_data.h" #include "fmpi_data.h"
#include "fmpi_stencil.h" #include "fmpi_stencil.h"
...@@ -42,9 +43,15 @@ ...@@ -42,9 +43,15 @@
/** /**
* TODO * TODO
*/ */
static struct fmpi_partition * fmpi_partition_block_1d( static struct fmpi_data fmpi_partition_block_1d(
const struct fmpi_ctx * ctx, const struct fmpi_data * data, size_t proc_cnt, const struct fmpi_ctx * ctx, const struct fmpi_data * data
const struct fmpi_stencil stencil );
/*------------------------------------------------------------------------------
fmpi_halo_1d()
------------------------------------------------------------------------------*/
static struct fmpi_data fmpi_halo_1d(
const struct fmpi_ctx * ctx, struct fmpi_data data,
struct fmpi_stencil stencil
); );
/*============================================================================== /*==============================================================================
PUBLIC FUNCTION DEFINITION PUBLIC FUNCTION DEFINITION
...@@ -61,12 +68,11 @@ struct fmpi_domain fmpi_new_domain( ...@@ -61,12 +68,11 @@ struct fmpi_domain fmpi_new_domain(
const size_t proc_cnt = (size_t)ctx->mpi->size; const size_t proc_cnt = (size_t)ctx->mpi->size;
struct fmpi_domain domain = { struct fmpi_domain domain = {
.data = data, .data = data,
.parts = NULL,
.part_cnt = proc_cnt .part_cnt = proc_cnt
}; };
domain.parts = fmpi_partition_block_1d(ctx, data, proc_cnt, stencil); domain.inner = fmpi_partition_block_1d(ctx, data);
if(domain.parts == NULL) { if(stencil.type != FMPI_STENCIL_NONE) {
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_partition_block_1d() failed!"); domain.halo = fmpi_halo_1d(ctx, domain.inner, stencil);
} }
return domain; return domain;
} }
...@@ -76,80 +82,64 @@ struct fmpi_domain fmpi_new_domain( ...@@ -76,80 +82,64 @@ struct fmpi_domain fmpi_new_domain(
/*------------------------------------------------------------------------------ /*------------------------------------------------------------------------------
fmpi_partition_block_1d() fmpi_partition_block_1d()
------------------------------------------------------------------------------*/ ------------------------------------------------------------------------------*/
static struct fmpi_partition * fmpi_partition_block_1d( static struct fmpi_data fmpi_partition_block_1d(
const struct fmpi_ctx * const ctx, const struct fmpi_data * const data, const struct fmpi_ctx * const ctx, const struct fmpi_data * const data
const size_t proc_cnt, const struct fmpi_stencil stencil
) { ) {
assert(ctx != NULL); assert(ctx != NULL);
assert(data != NULL); assert(data != NULL);
struct fmpi_partition * parts = malloc(proc_cnt * sizeof(*parts));
if(parts == NULL) {
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "malloc(parts) failed!");
return NULL;
}
const struct fmpi_type data_type = data->type;
const size_t type_size = data_type.size;
const size_t data_cnt = data->cnt;
//! @todo Handle proc_cnt > data_cnt
const size_t cnt_per_proc = data_cnt/proc_cnt;
size_t rem = data_cnt % proc_cnt;
size_t offset = 0;
const size_t rank = (size_t)ctx->mpi->rank; const size_t rank = (size_t)ctx->mpi->rank;
for(size_t i = 0; i < proc_cnt; i++) { const size_t cnt_per_proc = data->cnt/(size_t)ctx->mpi->size;
const size_t inner_cnt = (rem != 0) ? (cnt_per_proc + 1) : cnt_per_proc; const size_t rem = data->cnt % (size_t)ctx->mpi->size;
const size_t inner_size = inner_cnt * type_size; const size_t cnt = (rank < rem) ? (cnt_per_proc + 1) : cnt_per_proc;
if(i == rank) { const size_t size = cnt * data->type.size;
// Inner const size_t offset = (rank * size) + rem;
struct fmpi_data inner = { return (struct fmpi_data){
.type = data_type, .type = data->type,
.cnt = inner_cnt, .cnt = cnt,
.size = inner_size, .size = size,
.dim_len = {inner_cnt, 1, 1}, .dim_len = {cnt, 1, 1},
.dim_cnt = 1, .dim_cnt = 1,
.raw = (char *)data->raw + offset, .raw = (char *)data->raw + offset,
.gpu = NULL .gpu = NULL
}; };
parts[i].inner = inner; }
// Halo /*------------------------------------------------------------------------------
if(stencil.type != FMPI_STENCIL_NONE) { fmpi_create_halo()
const size_t halo_cnt = inner_cnt + 2; ------------------------------------------------------------------------------*/
const size_t halo_size = halo_cnt * type_size; static struct fmpi_data fmpi_halo_1d(
const struct fmpi_ctx * const ctx, const struct fmpi_data data,
const struct fmpi_stencil stencil
){
assert(ctx != NULL);
const size_t cnt = data.cnt + (2 * stencil.length);
const size_t type_size = data.type.size;
const size_t size = cnt * type_size;
struct fmpi_data halo = { struct fmpi_data halo = {
.type = data_type, .type = data.type,
.cnt = halo_cnt, .cnt = cnt,
.size = halo_size, .size = size,
.dim_len = {halo_cnt, 1, 1}, .dim_len = {cnt, 1, 1},
.dim_cnt = 1, .dim_cnt = 1,
.raw = calloc(halo_cnt, type_size), .raw = calloc(cnt, type_size),
.gpu = NULL .gpu = NULL
}; };
if(halo.raw == NULL) { if(halo.raw == NULL) {
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", FMPI_RAISE_ERROR(ctx->err_handler, "FMPI",
"calloc(halo.raw) failed!" "calloc(halo.raw) failed!"
); );
goto error;
} }
const size_t rank = (size_t)ctx->mpi->rank;
// Left boundary // Left boundary
if(i == 0) { if(rank == 0) {
memcpy((char*)halo.raw+type_size, inner.raw, inner_size+type_size); memcpy(((char *)halo.raw + type_size), data.raw, (data.size + type_size));
// Right boundary // Right boundary
} else if(i == proc_cnt-1) { } else if(rank == (size_t)(ctx->mpi->size - 1)) {
memcpy(halo.raw, (char*)inner.raw-type_size, halo_size); memcpy(halo.raw, ((const char *)(data.raw) - type_size), size);
// Middle // Middle
} else { } else {
memcpy(halo.raw, (char*)inner.raw-type_size, inner_size+type_size); memcpy(halo.raw, ((const char *)(data.raw) - type_size), (data.size + type_size));
}
parts[i].halo = halo;
}
}
offset += inner_size;
rem = (rem != 0) ? (rem - 1) : rem;
}
return parts;
error:
for(size_t i = 0; i < proc_cnt; i++) {
free(parts[i].halo.raw);
} }
free(parts); return halo;
return NULL;
} }
...@@ -58,16 +58,11 @@ struct fmpi_task fmpi_task_register_sync( ...@@ -58,16 +58,11 @@ struct fmpi_task fmpi_task_register_sync(
.args = *args, .args = *args,
.stencil = stencil .stencil = stencil
}; };
const size_t rank = (size_t)ctx->mpi->rank;
for(size_t i = 0; i < task.args.cnt; i++) { for(size_t i = 0; i < task.args.cnt; i++) {
task.domains[i] = fmpi_new_domain(ctx, &args->in[i], stencil); task.domains[i] = fmpi_new_domain(ctx, &args->in[i], stencil);
if(task.domains[i].parts == NULL) {
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_new_domain() failed!");
continue;
}
const struct fmpi_data * const data = (stencil.type != FMPI_STENCIL_NONE) const struct fmpi_data * const data = (stencil.type != FMPI_STENCIL_NONE)
? &task.domains[i].parts[rank].halo ? &task.domains[i].halo
: &task.domains[i].parts[rank].inner; : &task.domains[i].inner;
//! @todo Could fmpi_futhark_new_data_async() be called here instead? //! @todo Could fmpi_futhark_new_data_async() be called here instead?
void * gpu_data = fmpi_futhark_new_data_sync( void * gpu_data = fmpi_futhark_new_data_sync(
ctx->fut, data->raw, data->type.base, data->dim_cnt, ctx->fut, data->raw, data->type.base, data->dim_cnt,
...@@ -103,16 +98,11 @@ struct fmpi_task fmpi_task_register_async( ...@@ -103,16 +98,11 @@ struct fmpi_task fmpi_task_register_async(
.args = *args, .args = *args,
.stencil = stencil .stencil = stencil
}; };
const size_t rank = (size_t)ctx->mpi->rank;
for(size_t i = 0; i < task.args.cnt; i++) { for(size_t i = 0; i < task.args.cnt; i++) {
task.domains[i] = fmpi_new_domain(ctx, &args->in[i], stencil); task.domains[i] = fmpi_new_domain(ctx, &args->in[i], stencil);
if(task.domains[i].parts == NULL) {
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_new_domain() failed!");
continue;
}
const struct fmpi_data * const data = (stencil.type != FMPI_STENCIL_NONE) const struct fmpi_data * const data = (stencil.type != FMPI_STENCIL_NONE)
? &task.domains[i].parts[rank].halo ? &task.domains[i].halo
: &task.domains[i].parts[rank].inner; : &task.domains[i].inner;
void * gpu_data = fmpi_futhark_new_data_async( void * gpu_data = fmpi_futhark_new_data_async(
ctx->fut, data->raw, data->type.base, data->dim_cnt, ctx->fut, data->raw, data->type.base, data->dim_cnt,
data->dim_len[0], data->dim_len[1], data->dim_len[2] data->dim_len[0], data->dim_len[1], data->dim_len[2]
...@@ -135,6 +125,7 @@ int fmpi_task_run_sync( ...@@ -135,6 +125,7 @@ int fmpi_task_run_sync(
){ ){
assert(ctx != NULL); assert(ctx != NULL);
assert(task != NULL); assert(task != NULL);
const int err_id = task->func(ctx, &task->args); const int err_id = task->func(ctx, &task->args);
fmpi_futhark_sync(ctx->fut); fmpi_futhark_sync(ctx->fut);
fmpi_futhark_check_error(ctx->fut, "task->func"); fmpi_futhark_check_error(ctx->fut, "task->func");
...@@ -195,7 +186,7 @@ int fmpi_task_finalize( ...@@ -195,7 +186,7 @@ int fmpi_task_finalize(
return err; return err;
} }
if(op == FMPI_TASK_OP_GATHER) { if(op == FMPI_TASK_OP_GATHER) {
const size_t cnt = task->domains[0].parts[ctx->mpi->rank].inner.cnt; const size_t cnt = task->domains[0].inner.cnt;
MPI_Datatype type = fmpi_mpi_type(task->args.out.type.base); MPI_Datatype type = fmpi_mpi_type(task->args.out.type.base);
const int err = fmpi_mpi_world_gather_in_place( const int err = fmpi_mpi_world_gather_in_place(
ctx->mpi, task->args.out.raw, type, cnt, cnt ctx->mpi, task->args.out.raw, type, cnt, cnt
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment