Skip to content
Snippets Groups Projects
Verified Commit bf0f069c authored by raphael.bach's avatar raphael.bach
Browse files

Remove `fmpi_partition`

parent 35484f46
No related branches found
No related tags found
No related merge requests found
......@@ -36,13 +36,6 @@
==============================================================================*/
// Forward declaration from `fmpi_ctx.h`
struct fmpi_ctx;
/*------------------------------------------------------------------------------
fmpi_partition
------------------------------------------------------------------------------*/
typedef struct fmpi_partition {
struct fmpi_data inner;
struct fmpi_data halo;
} fmpi_partition;
/*------------------------------------------------------------------------------
fmpi_domain
------------------------------------------------------------------------------*/
......@@ -51,7 +44,8 @@ typedef struct fmpi_partition {
*/
typedef struct fmpi_domain {
const struct fmpi_data * data; //!< Data composing the domain.
struct fmpi_partition * parts; //!< Partitions of the domain after decomposition.
struct fmpi_data inner;
struct fmpi_data halo;
size_t part_cnt; //!< Number of partitions.
} fmpi_domain;
/*==============================================================================
......
......@@ -27,6 +27,7 @@
#include <assert.h>
#include <stdlib.h> // size_t, NULL, malloc(), free()
#include <string.h> // memcpy()
#include <stdio.h>
// Internal
#include "fmpi_data.h"
#include "fmpi_stencil.h"
......@@ -42,9 +43,15 @@
/**
* TODO
*/
static struct fmpi_partition * fmpi_partition_block_1d(
const struct fmpi_ctx * ctx, const struct fmpi_data * data, size_t proc_cnt,
const struct fmpi_stencil stencil
static struct fmpi_data fmpi_partition_block_1d(
const struct fmpi_ctx * ctx, const struct fmpi_data * data
);
/*------------------------------------------------------------------------------
fmpi_halo_1d()
------------------------------------------------------------------------------*/
static struct fmpi_data fmpi_halo_1d(
const struct fmpi_ctx * ctx, struct fmpi_data data,
struct fmpi_stencil stencil
);
/*==============================================================================
PUBLIC FUNCTION DEFINITION
......@@ -61,12 +68,11 @@ struct fmpi_domain fmpi_new_domain(
const size_t proc_cnt = (size_t)ctx->mpi->size;
struct fmpi_domain domain = {
.data = data,
.parts = NULL,
.part_cnt = proc_cnt
};
domain.parts = fmpi_partition_block_1d(ctx, data, proc_cnt, stencil);
if(domain.parts == NULL) {
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_partition_block_1d() failed!");
domain.inner = fmpi_partition_block_1d(ctx, data);
if(stencil.type != FMPI_STENCIL_NONE) {
domain.halo = fmpi_halo_1d(ctx, domain.inner, stencil);
}
return domain;
}
......@@ -76,80 +82,64 @@ struct fmpi_domain fmpi_new_domain(
/*------------------------------------------------------------------------------
fmpi_partition_block_1d()
------------------------------------------------------------------------------*/
static struct fmpi_partition * fmpi_partition_block_1d(
const struct fmpi_ctx * const ctx, const struct fmpi_data * const data,
const size_t proc_cnt, const struct fmpi_stencil stencil
static struct fmpi_data fmpi_partition_block_1d(
const struct fmpi_ctx * const ctx, const struct fmpi_data * const data
) {
assert(ctx != NULL);
assert(data != NULL);
struct fmpi_partition * parts = malloc(proc_cnt * sizeof(*parts));
if(parts == NULL) {
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "malloc(parts) failed!");
return NULL;
}
const struct fmpi_type data_type = data->type;
const size_t type_size = data_type.size;
const size_t data_cnt = data->cnt;
//! @todo Handle proc_cnt > data_cnt
const size_t cnt_per_proc = data_cnt/proc_cnt;
size_t rem = data_cnt % proc_cnt;
size_t offset = 0;
const size_t rank = (size_t)ctx->mpi->rank;
for(size_t i = 0; i < proc_cnt; i++) {
const size_t inner_cnt = (rem != 0) ? (cnt_per_proc + 1) : cnt_per_proc;
const size_t inner_size = inner_cnt * type_size;
if(i == rank) {
// Inner
struct fmpi_data inner = {
.type = data_type,
.cnt = inner_cnt,
.size = inner_size,
.dim_len = {inner_cnt, 1, 1},
.dim_cnt = 1,
.raw = (char *)data->raw + offset,
.gpu = NULL
};
parts[i].inner = inner;
// Halo
if(stencil.type != FMPI_STENCIL_NONE) {
const size_t halo_cnt = inner_cnt + 2;
const size_t halo_size = halo_cnt * type_size;
struct fmpi_data halo = {
.type = data_type,
.cnt = halo_cnt,
.size = halo_size,
.dim_len = {halo_cnt, 1, 1},
.dim_cnt = 1,
.raw = calloc(halo_cnt, type_size),
.gpu = NULL
};
if(halo.raw == NULL) {
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI",
"calloc(halo.raw) failed!"
);
goto error;
}
// Left boundary
if(i == 0) {
memcpy((char*)halo.raw+type_size, inner.raw, inner_size+type_size);
// Right boundary
} else if(i == proc_cnt-1) {
memcpy(halo.raw, (char*)inner.raw-type_size, halo_size);
// Middle
} else {
memcpy(halo.raw, (char*)inner.raw-type_size, inner_size+type_size);
}
parts[i].halo = halo;
}
}
offset += inner_size;
rem = (rem != 0) ? (rem - 1) : rem;
const size_t cnt_per_proc = data->cnt/(size_t)ctx->mpi->size;
const size_t rem = data->cnt % (size_t)ctx->mpi->size;
const size_t cnt = (rank < rem) ? (cnt_per_proc + 1) : cnt_per_proc;
const size_t size = cnt * data->type.size;
const size_t offset = (rank * size) + rem;
return (struct fmpi_data){
.type = data->type,
.cnt = cnt,
.size = size,
.dim_len = {cnt, 1, 1},
.dim_cnt = 1,
.raw = (char *)data->raw + offset,
.gpu = NULL
};
}
/*------------------------------------------------------------------------------
fmpi_create_halo()
------------------------------------------------------------------------------*/
static struct fmpi_data fmpi_halo_1d(
const struct fmpi_ctx * const ctx, const struct fmpi_data data,
const struct fmpi_stencil stencil
){
assert(ctx != NULL);
const size_t cnt = data.cnt + (2 * stencil.length);
const size_t type_size = data.type.size;
const size_t size = cnt * type_size;
struct fmpi_data halo = {
.type = data.type,
.cnt = cnt,
.size = size,
.dim_len = {cnt, 1, 1},
.dim_cnt = 1,
.raw = calloc(cnt, type_size),
.gpu = NULL
};
if(halo.raw == NULL) {
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI",
"calloc(halo.raw) failed!"
);
}
return parts;
error:
for(size_t i = 0; i < proc_cnt; i++) {
free(parts[i].halo.raw);
const size_t rank = (size_t)ctx->mpi->rank;
// Left boundary
if(rank == 0) {
memcpy(((char *)halo.raw + type_size), data.raw, (data.size + type_size));
// Right boundary
} else if(rank == (size_t)(ctx->mpi->size - 1)) {
memcpy(halo.raw, ((const char *)(data.raw) - type_size), size);
// Middle
} else {
memcpy(halo.raw, ((const char *)(data.raw) - type_size), (data.size + type_size));
}
free(parts);
return NULL;
return halo;
}
......@@ -58,16 +58,11 @@ struct fmpi_task fmpi_task_register_sync(
.args = *args,
.stencil = stencil
};
const size_t rank = (size_t)ctx->mpi->rank;
for(size_t i = 0; i < task.args.cnt; i++) {
task.domains[i] = fmpi_new_domain(ctx, &args->in[i], stencil);
if(task.domains[i].parts == NULL) {
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_new_domain() failed!");
continue;
}
const struct fmpi_data * const data = (stencil.type != FMPI_STENCIL_NONE)
? &task.domains[i].parts[rank].halo
: &task.domains[i].parts[rank].inner;
? &task.domains[i].halo
: &task.domains[i].inner;
//! @todo Could fmpi_futhark_new_data_async() be called here instead?
void * gpu_data = fmpi_futhark_new_data_sync(
ctx->fut, data->raw, data->type.base, data->dim_cnt,
......@@ -103,16 +98,11 @@ struct fmpi_task fmpi_task_register_async(
.args = *args,
.stencil = stencil
};
const size_t rank = (size_t)ctx->mpi->rank;
for(size_t i = 0; i < task.args.cnt; i++) {
task.domains[i] = fmpi_new_domain(ctx, &args->in[i], stencil);
if(task.domains[i].parts == NULL) {
FMPI_RAISE_ERROR(ctx->err_handler, "FMPI", "fmpi_new_domain() failed!");
continue;
}
const struct fmpi_data * const data = (stencil.type != FMPI_STENCIL_NONE)
? &task.domains[i].parts[rank].halo
: &task.domains[i].parts[rank].inner;
? &task.domains[i].halo
: &task.domains[i].inner;
void * gpu_data = fmpi_futhark_new_data_async(
ctx->fut, data->raw, data->type.base, data->dim_cnt,
data->dim_len[0], data->dim_len[1], data->dim_len[2]
......@@ -135,6 +125,7 @@ int fmpi_task_run_sync(
){
assert(ctx != NULL);
assert(task != NULL);
const int err_id = task->func(ctx, &task->args);
fmpi_futhark_sync(ctx->fut);
fmpi_futhark_check_error(ctx->fut, "task->func");
......@@ -195,7 +186,7 @@ int fmpi_task_finalize(
return err;
}
if(op == FMPI_TASK_OP_GATHER) {
const size_t cnt = task->domains[0].parts[ctx->mpi->rank].inner.cnt;
const size_t cnt = task->domains[0].inner.cnt;
MPI_Datatype type = fmpi_mpi_type(task->args.out.type.base);
const int err = fmpi_mpi_world_gather_in_place(
ctx->mpi, task->args.out.raw, type, cnt, cnt
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment